linux/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/device.h>
  24#include <linux/export.h>
  25#include <linux/err.h>
  26#include <linux/fs.h>
  27#include <linux/file.h>
  28#include <linux/sched.h>
  29#include <linux/slab.h>
  30#include <linux/uaccess.h>
  31#include <linux/compat.h>
  32#include <uapi/linux/kfd_ioctl.h>
  33#include <linux/time.h>
  34#include <linux/mm.h>
  35#include <linux/mman.h>
  36#include <asm/processor.h>
  37#include "kfd_priv.h"
  38#include "kfd_device_queue_manager.h"
  39#include "kfd_dbgmgr.h"
  40
  41static long kfd_ioctl(struct file *, unsigned int, unsigned long);
  42static int kfd_open(struct inode *, struct file *);
  43static int kfd_mmap(struct file *, struct vm_area_struct *);
  44
  45static const char kfd_dev_name[] = "kfd";
  46
  47static const struct file_operations kfd_fops = {
  48        .owner = THIS_MODULE,
  49        .unlocked_ioctl = kfd_ioctl,
  50        .compat_ioctl = kfd_ioctl,
  51        .open = kfd_open,
  52        .mmap = kfd_mmap,
  53};
  54
  55static int kfd_char_dev_major = -1;
  56static struct class *kfd_class;
  57struct device *kfd_device;
  58
  59int kfd_chardev_init(void)
  60{
  61        int err = 0;
  62
  63        kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
  64        err = kfd_char_dev_major;
  65        if (err < 0)
  66                goto err_register_chrdev;
  67
  68        kfd_class = class_create(THIS_MODULE, kfd_dev_name);
  69        err = PTR_ERR(kfd_class);
  70        if (IS_ERR(kfd_class))
  71                goto err_class_create;
  72
  73        kfd_device = device_create(kfd_class, NULL,
  74                                        MKDEV(kfd_char_dev_major, 0),
  75                                        NULL, kfd_dev_name);
  76        err = PTR_ERR(kfd_device);
  77        if (IS_ERR(kfd_device))
  78                goto err_device_create;
  79
  80        return 0;
  81
  82err_device_create:
  83        class_destroy(kfd_class);
  84err_class_create:
  85        unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
  86err_register_chrdev:
  87        return err;
  88}
  89
  90void kfd_chardev_exit(void)
  91{
  92        device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
  93        class_destroy(kfd_class);
  94        unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
  95}
  96
  97struct device *kfd_chardev(void)
  98{
  99        return kfd_device;
 100}
 101
 102
 103static int kfd_open(struct inode *inode, struct file *filep)
 104{
 105        struct kfd_process *process;
 106        bool is_32bit_user_mode;
 107
 108        if (iminor(inode) != 0)
 109                return -ENODEV;
 110
 111        is_32bit_user_mode = in_compat_syscall();
 112
 113        if (is_32bit_user_mode) {
 114                dev_warn(kfd_device,
 115                        "Process %d (32-bit) failed to open /dev/kfd\n"
 116                        "32-bit processes are not supported by amdkfd\n",
 117                        current->pid);
 118                return -EPERM;
 119        }
 120
 121        process = kfd_create_process(filep);
 122        if (IS_ERR(process))
 123                return PTR_ERR(process);
 124
 125        dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
 126                process->pasid, process->is_32bit_user_mode);
 127
 128        return 0;
 129}
 130
 131static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
 132                                        void *data)
 133{
 134        struct kfd_ioctl_get_version_args *args = data;
 135
 136        args->major_version = KFD_IOCTL_MAJOR_VERSION;
 137        args->minor_version = KFD_IOCTL_MINOR_VERSION;
 138
 139        return 0;
 140}
 141
 142static int set_queue_properties_from_user(struct queue_properties *q_properties,
 143                                struct kfd_ioctl_create_queue_args *args)
 144{
 145        if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
 146                pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
 147                return -EINVAL;
 148        }
 149
 150        if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
 151                pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
 152                return -EINVAL;
 153        }
 154
 155        if ((args->ring_base_address) &&
 156                (!access_ok(VERIFY_WRITE,
 157                        (const void __user *) args->ring_base_address,
 158                        sizeof(uint64_t)))) {
 159                pr_err("Can't access ring base address\n");
 160                return -EFAULT;
 161        }
 162
 163        if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
 164                pr_err("Ring size must be a power of 2 or 0\n");
 165                return -EINVAL;
 166        }
 167
 168        if (!access_ok(VERIFY_WRITE,
 169                        (const void __user *) args->read_pointer_address,
 170                        sizeof(uint32_t))) {
 171                pr_err("Can't access read pointer\n");
 172                return -EFAULT;
 173        }
 174
 175        if (!access_ok(VERIFY_WRITE,
 176                        (const void __user *) args->write_pointer_address,
 177                        sizeof(uint32_t))) {
 178                pr_err("Can't access write pointer\n");
 179                return -EFAULT;
 180        }
 181
 182        if (args->eop_buffer_address &&
 183                !access_ok(VERIFY_WRITE,
 184                        (const void __user *) args->eop_buffer_address,
 185                        sizeof(uint32_t))) {
 186                pr_debug("Can't access eop buffer");
 187                return -EFAULT;
 188        }
 189
 190        if (args->ctx_save_restore_address &&
 191                !access_ok(VERIFY_WRITE,
 192                        (const void __user *) args->ctx_save_restore_address,
 193                        sizeof(uint32_t))) {
 194                pr_debug("Can't access ctx save restore buffer");
 195                return -EFAULT;
 196        }
 197
 198        q_properties->is_interop = false;
 199        q_properties->queue_percent = args->queue_percentage;
 200        q_properties->priority = args->queue_priority;
 201        q_properties->queue_address = args->ring_base_address;
 202        q_properties->queue_size = args->ring_size;
 203        q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
 204        q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
 205        q_properties->eop_ring_buffer_address = args->eop_buffer_address;
 206        q_properties->eop_ring_buffer_size = args->eop_buffer_size;
 207        q_properties->ctx_save_restore_area_address =
 208                        args->ctx_save_restore_address;
 209        q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
 210        q_properties->ctl_stack_size = args->ctl_stack_size;
 211        if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
 212                args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
 213                q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
 214        else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
 215                q_properties->type = KFD_QUEUE_TYPE_SDMA;
 216        else
 217                return -ENOTSUPP;
 218
 219        if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
 220                q_properties->format = KFD_QUEUE_FORMAT_AQL;
 221        else
 222                q_properties->format = KFD_QUEUE_FORMAT_PM4;
 223
 224        pr_debug("Queue Percentage: %d, %d\n",
 225                        q_properties->queue_percent, args->queue_percentage);
 226
 227        pr_debug("Queue Priority: %d, %d\n",
 228                        q_properties->priority, args->queue_priority);
 229
 230        pr_debug("Queue Address: 0x%llX, 0x%llX\n",
 231                        q_properties->queue_address, args->ring_base_address);
 232
 233        pr_debug("Queue Size: 0x%llX, %u\n",
 234                        q_properties->queue_size, args->ring_size);
 235
 236        pr_debug("Queue r/w Pointers: %px, %px\n",
 237                        q_properties->read_ptr,
 238                        q_properties->write_ptr);
 239
 240        pr_debug("Queue Format: %d\n", q_properties->format);
 241
 242        pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
 243
 244        pr_debug("Queue CTX save area: 0x%llX\n",
 245                        q_properties->ctx_save_restore_area_address);
 246
 247        return 0;
 248}
 249
 250static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
 251                                        void *data)
 252{
 253        struct kfd_ioctl_create_queue_args *args = data;
 254        struct kfd_dev *dev;
 255        int err = 0;
 256        unsigned int queue_id;
 257        struct kfd_process_device *pdd;
 258        struct queue_properties q_properties;
 259
 260        memset(&q_properties, 0, sizeof(struct queue_properties));
 261
 262        pr_debug("Creating queue ioctl\n");
 263
 264        err = set_queue_properties_from_user(&q_properties, args);
 265        if (err)
 266                return err;
 267
 268        pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
 269        dev = kfd_device_by_id(args->gpu_id);
 270        if (!dev) {
 271                pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
 272                return -EINVAL;
 273        }
 274
 275        mutex_lock(&p->mutex);
 276
 277        pdd = kfd_bind_process_to_device(dev, p);
 278        if (IS_ERR(pdd)) {
 279                err = -ESRCH;
 280                goto err_bind_process;
 281        }
 282
 283        pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
 284                        p->pasid,
 285                        dev->id);
 286
 287        err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
 288        if (err != 0)
 289                goto err_create_queue;
 290
 291        args->queue_id = queue_id;
 292
 293
 294        /* Return gpu_id as doorbell offset for mmap usage */
 295        args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
 296        args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
 297        args->doorbell_offset <<= PAGE_SHIFT;
 298        if (KFD_IS_SOC15(dev->device_info->asic_family))
 299                /* On SOC15 ASICs, doorbell allocation must be
 300                 * per-device, and independent from the per-process
 301                 * queue_id. Return the doorbell offset within the
 302                 * doorbell aperture to user mode.
 303                 */
 304                args->doorbell_offset |= q_properties.doorbell_off;
 305
 306        mutex_unlock(&p->mutex);
 307
 308        pr_debug("Queue id %d was created successfully\n", args->queue_id);
 309
 310        pr_debug("Ring buffer address == 0x%016llX\n",
 311                        args->ring_base_address);
 312
 313        pr_debug("Read ptr address    == 0x%016llX\n",
 314                        args->read_pointer_address);
 315
 316        pr_debug("Write ptr address   == 0x%016llX\n",
 317                        args->write_pointer_address);
 318
 319        return 0;
 320
 321err_create_queue:
 322err_bind_process:
 323        mutex_unlock(&p->mutex);
 324        return err;
 325}
 326
 327static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
 328                                        void *data)
 329{
 330        int retval;
 331        struct kfd_ioctl_destroy_queue_args *args = data;
 332
 333        pr_debug("Destroying queue id %d for pasid %d\n",
 334                                args->queue_id,
 335                                p->pasid);
 336
 337        mutex_lock(&p->mutex);
 338
 339        retval = pqm_destroy_queue(&p->pqm, args->queue_id);
 340
 341        mutex_unlock(&p->mutex);
 342        return retval;
 343}
 344
 345static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
 346                                        void *data)
 347{
 348        int retval;
 349        struct kfd_ioctl_update_queue_args *args = data;
 350        struct queue_properties properties;
 351
 352        if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
 353                pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
 354                return -EINVAL;
 355        }
 356
 357        if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
 358                pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
 359                return -EINVAL;
 360        }
 361
 362        if ((args->ring_base_address) &&
 363                (!access_ok(VERIFY_WRITE,
 364                        (const void __user *) args->ring_base_address,
 365                        sizeof(uint64_t)))) {
 366                pr_err("Can't access ring base address\n");
 367                return -EFAULT;
 368        }
 369
 370        if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
 371                pr_err("Ring size must be a power of 2 or 0\n");
 372                return -EINVAL;
 373        }
 374
 375        properties.queue_address = args->ring_base_address;
 376        properties.queue_size = args->ring_size;
 377        properties.queue_percent = args->queue_percentage;
 378        properties.priority = args->queue_priority;
 379
 380        pr_debug("Updating queue id %d for pasid %d\n",
 381                        args->queue_id, p->pasid);
 382
 383        mutex_lock(&p->mutex);
 384
 385        retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
 386
 387        mutex_unlock(&p->mutex);
 388
 389        return retval;
 390}
 391
 392static int kfd_ioctl_set_memory_policy(struct file *filep,
 393                                        struct kfd_process *p, void *data)
 394{
 395        struct kfd_ioctl_set_memory_policy_args *args = data;
 396        struct kfd_dev *dev;
 397        int err = 0;
 398        struct kfd_process_device *pdd;
 399        enum cache_policy default_policy, alternate_policy;
 400
 401        if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
 402            && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
 403                return -EINVAL;
 404        }
 405
 406        if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
 407            && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
 408                return -EINVAL;
 409        }
 410
 411        dev = kfd_device_by_id(args->gpu_id);
 412        if (!dev)
 413                return -EINVAL;
 414
 415        mutex_lock(&p->mutex);
 416
 417        pdd = kfd_bind_process_to_device(dev, p);
 418        if (IS_ERR(pdd)) {
 419                err = -ESRCH;
 420                goto out;
 421        }
 422
 423        default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
 424                         ? cache_policy_coherent : cache_policy_noncoherent;
 425
 426        alternate_policy =
 427                (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
 428                   ? cache_policy_coherent : cache_policy_noncoherent;
 429
 430        if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
 431                                &pdd->qpd,
 432                                default_policy,
 433                                alternate_policy,
 434                                (void __user *)args->alternate_aperture_base,
 435                                args->alternate_aperture_size))
 436                err = -EINVAL;
 437
 438out:
 439        mutex_unlock(&p->mutex);
 440
 441        return err;
 442}
 443
 444static int kfd_ioctl_set_trap_handler(struct file *filep,
 445                                        struct kfd_process *p, void *data)
 446{
 447        struct kfd_ioctl_set_trap_handler_args *args = data;
 448        struct kfd_dev *dev;
 449        int err = 0;
 450        struct kfd_process_device *pdd;
 451
 452        dev = kfd_device_by_id(args->gpu_id);
 453        if (dev == NULL)
 454                return -EINVAL;
 455
 456        mutex_lock(&p->mutex);
 457
 458        pdd = kfd_bind_process_to_device(dev, p);
 459        if (IS_ERR(pdd)) {
 460                err = -ESRCH;
 461                goto out;
 462        }
 463
 464        if (dev->dqm->ops.set_trap_handler(dev->dqm,
 465                                        &pdd->qpd,
 466                                        args->tba_addr,
 467                                        args->tma_addr))
 468                err = -EINVAL;
 469
 470out:
 471        mutex_unlock(&p->mutex);
 472
 473        return err;
 474}
 475
 476static int kfd_ioctl_dbg_register(struct file *filep,
 477                                struct kfd_process *p, void *data)
 478{
 479        struct kfd_ioctl_dbg_register_args *args = data;
 480        struct kfd_dev *dev;
 481        struct kfd_dbgmgr *dbgmgr_ptr;
 482        struct kfd_process_device *pdd;
 483        bool create_ok;
 484        long status = 0;
 485
 486        dev = kfd_device_by_id(args->gpu_id);
 487        if (!dev)
 488                return -EINVAL;
 489
 490        if (dev->device_info->asic_family == CHIP_CARRIZO) {
 491                pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
 492                return -EINVAL;
 493        }
 494
 495        mutex_lock(&p->mutex);
 496        mutex_lock(kfd_get_dbgmgr_mutex());
 497
 498        /*
 499         * make sure that we have pdd, if this the first queue created for
 500         * this process
 501         */
 502        pdd = kfd_bind_process_to_device(dev, p);
 503        if (IS_ERR(pdd)) {
 504                status = PTR_ERR(pdd);
 505                goto out;
 506        }
 507
 508        if (!dev->dbgmgr) {
 509                /* In case of a legal call, we have no dbgmgr yet */
 510                create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
 511                if (create_ok) {
 512                        status = kfd_dbgmgr_register(dbgmgr_ptr, p);
 513                        if (status != 0)
 514                                kfd_dbgmgr_destroy(dbgmgr_ptr);
 515                        else
 516                                dev->dbgmgr = dbgmgr_ptr;
 517                }
 518        } else {
 519                pr_debug("debugger already registered\n");
 520                status = -EINVAL;
 521        }
 522
 523out:
 524        mutex_unlock(kfd_get_dbgmgr_mutex());
 525        mutex_unlock(&p->mutex);
 526
 527        return status;
 528}
 529
 530static int kfd_ioctl_dbg_unregister(struct file *filep,
 531                                struct kfd_process *p, void *data)
 532{
 533        struct kfd_ioctl_dbg_unregister_args *args = data;
 534        struct kfd_dev *dev;
 535        long status;
 536
 537        dev = kfd_device_by_id(args->gpu_id);
 538        if (!dev || !dev->dbgmgr)
 539                return -EINVAL;
 540
 541        if (dev->device_info->asic_family == CHIP_CARRIZO) {
 542                pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
 543                return -EINVAL;
 544        }
 545
 546        mutex_lock(kfd_get_dbgmgr_mutex());
 547
 548        status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
 549        if (!status) {
 550                kfd_dbgmgr_destroy(dev->dbgmgr);
 551                dev->dbgmgr = NULL;
 552        }
 553
 554        mutex_unlock(kfd_get_dbgmgr_mutex());
 555
 556        return status;
 557}
 558
 559/*
 560 * Parse and generate variable size data structure for address watch.
 561 * Total size of the buffer and # watch points is limited in order
 562 * to prevent kernel abuse. (no bearing to the much smaller HW limitation
 563 * which is enforced by dbgdev module)
 564 * please also note that the watch address itself are not "copied from user",
 565 * since it be set into the HW in user mode values.
 566 *
 567 */
 568static int kfd_ioctl_dbg_address_watch(struct file *filep,
 569                                        struct kfd_process *p, void *data)
 570{
 571        struct kfd_ioctl_dbg_address_watch_args *args = data;
 572        struct kfd_dev *dev;
 573        struct dbg_address_watch_info aw_info;
 574        unsigned char *args_buff;
 575        long status;
 576        void __user *cmd_from_user;
 577        uint64_t watch_mask_value = 0;
 578        unsigned int args_idx = 0;
 579
 580        memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
 581
 582        dev = kfd_device_by_id(args->gpu_id);
 583        if (!dev)
 584                return -EINVAL;
 585
 586        if (dev->device_info->asic_family == CHIP_CARRIZO) {
 587                pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
 588                return -EINVAL;
 589        }
 590
 591        cmd_from_user = (void __user *) args->content_ptr;
 592
 593        /* Validate arguments */
 594
 595        if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
 596                (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
 597                (cmd_from_user == NULL))
 598                return -EINVAL;
 599
 600        /* this is the actual buffer to work with */
 601        args_buff = memdup_user(cmd_from_user,
 602                                args->buf_size_in_bytes - sizeof(*args));
 603        if (IS_ERR(args_buff))
 604                return PTR_ERR(args_buff);
 605
 606        aw_info.process = p;
 607
 608        aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
 609        args_idx += sizeof(aw_info.num_watch_points);
 610
 611        aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
 612        args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
 613
 614        /*
 615         * set watch address base pointer to point on the array base
 616         * within args_buff
 617         */
 618        aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
 619
 620        /* skip over the addresses buffer */
 621        args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
 622
 623        if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
 624                status = -EINVAL;
 625                goto out;
 626        }
 627
 628        watch_mask_value = (uint64_t) args_buff[args_idx];
 629
 630        if (watch_mask_value > 0) {
 631                /*
 632                 * There is an array of masks.
 633                 * set watch mask base pointer to point on the array base
 634                 * within args_buff
 635                 */
 636                aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
 637
 638                /* skip over the masks buffer */
 639                args_idx += sizeof(aw_info.watch_mask) *
 640                                aw_info.num_watch_points;
 641        } else {
 642                /* just the NULL mask, set to NULL and skip over it */
 643                aw_info.watch_mask = NULL;
 644                args_idx += sizeof(aw_info.watch_mask);
 645        }
 646
 647        if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
 648                status = -EINVAL;
 649                goto out;
 650        }
 651
 652        /* Currently HSA Event is not supported for DBG */
 653        aw_info.watch_event = NULL;
 654
 655        mutex_lock(kfd_get_dbgmgr_mutex());
 656
 657        status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
 658
 659        mutex_unlock(kfd_get_dbgmgr_mutex());
 660
 661out:
 662        kfree(args_buff);
 663
 664        return status;
 665}
 666
 667/* Parse and generate fixed size data structure for wave control */
 668static int kfd_ioctl_dbg_wave_control(struct file *filep,
 669                                        struct kfd_process *p, void *data)
 670{
 671        struct kfd_ioctl_dbg_wave_control_args *args = data;
 672        struct kfd_dev *dev;
 673        struct dbg_wave_control_info wac_info;
 674        unsigned char *args_buff;
 675        uint32_t computed_buff_size;
 676        long status;
 677        void __user *cmd_from_user;
 678        unsigned int args_idx = 0;
 679
 680        memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
 681
 682        /* we use compact form, independent of the packing attribute value */
 683        computed_buff_size = sizeof(*args) +
 684                                sizeof(wac_info.mode) +
 685                                sizeof(wac_info.operand) +
 686                                sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
 687                                sizeof(wac_info.dbgWave_msg.MemoryVA) +
 688                                sizeof(wac_info.trapId);
 689
 690        dev = kfd_device_by_id(args->gpu_id);
 691        if (!dev)
 692                return -EINVAL;
 693
 694        if (dev->device_info->asic_family == CHIP_CARRIZO) {
 695                pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
 696                return -EINVAL;
 697        }
 698
 699        /* input size must match the computed "compact" size */
 700        if (args->buf_size_in_bytes != computed_buff_size) {
 701                pr_debug("size mismatch, computed : actual %u : %u\n",
 702                                args->buf_size_in_bytes, computed_buff_size);
 703                return -EINVAL;
 704        }
 705
 706        cmd_from_user = (void __user *) args->content_ptr;
 707
 708        if (cmd_from_user == NULL)
 709                return -EINVAL;
 710
 711        /* copy the entire buffer from user */
 712
 713        args_buff = memdup_user(cmd_from_user,
 714                                args->buf_size_in_bytes - sizeof(*args));
 715        if (IS_ERR(args_buff))
 716                return PTR_ERR(args_buff);
 717
 718        /* move ptr to the start of the "pay-load" area */
 719        wac_info.process = p;
 720
 721        wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
 722        args_idx += sizeof(wac_info.operand);
 723
 724        wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
 725        args_idx += sizeof(wac_info.mode);
 726
 727        wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
 728        args_idx += sizeof(wac_info.trapId);
 729
 730        wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
 731                                        *((uint32_t *)(&args_buff[args_idx]));
 732        wac_info.dbgWave_msg.MemoryVA = NULL;
 733
 734        mutex_lock(kfd_get_dbgmgr_mutex());
 735
 736        pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
 737                        wac_info.process, wac_info.operand,
 738                        wac_info.mode, wac_info.trapId,
 739                        wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
 740
 741        status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
 742
 743        pr_debug("Returned status of dbg manager is %ld\n", status);
 744
 745        mutex_unlock(kfd_get_dbgmgr_mutex());
 746
 747        kfree(args_buff);
 748
 749        return status;
 750}
 751
 752static int kfd_ioctl_get_clock_counters(struct file *filep,
 753                                struct kfd_process *p, void *data)
 754{
 755        struct kfd_ioctl_get_clock_counters_args *args = data;
 756        struct kfd_dev *dev;
 757        struct timespec64 time;
 758
 759        dev = kfd_device_by_id(args->gpu_id);
 760        if (dev)
 761                /* Reading GPU clock counter from KGD */
 762                args->gpu_clock_counter =
 763                        dev->kfd2kgd->get_gpu_clock_counter(dev->kgd);
 764        else
 765                /* Node without GPU resource */
 766                args->gpu_clock_counter = 0;
 767
 768        /* No access to rdtsc. Using raw monotonic time */
 769        getrawmonotonic64(&time);
 770        args->cpu_clock_counter = (uint64_t)timespec64_to_ns(&time);
 771
 772        get_monotonic_boottime64(&time);
 773        args->system_clock_counter = (uint64_t)timespec64_to_ns(&time);
 774
 775        /* Since the counter is in nano-seconds we use 1GHz frequency */
 776        args->system_clock_freq = 1000000000;
 777
 778        return 0;
 779}
 780
 781
 782static int kfd_ioctl_get_process_apertures(struct file *filp,
 783                                struct kfd_process *p, void *data)
 784{
 785        struct kfd_ioctl_get_process_apertures_args *args = data;
 786        struct kfd_process_device_apertures *pAperture;
 787        struct kfd_process_device *pdd;
 788
 789        dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
 790
 791        args->num_of_nodes = 0;
 792
 793        mutex_lock(&p->mutex);
 794
 795        /*if the process-device list isn't empty*/
 796        if (kfd_has_process_device_data(p)) {
 797                /* Run over all pdd of the process */
 798                pdd = kfd_get_first_process_device_data(p);
 799                do {
 800                        pAperture =
 801                                &args->process_apertures[args->num_of_nodes];
 802                        pAperture->gpu_id = pdd->dev->id;
 803                        pAperture->lds_base = pdd->lds_base;
 804                        pAperture->lds_limit = pdd->lds_limit;
 805                        pAperture->gpuvm_base = pdd->gpuvm_base;
 806                        pAperture->gpuvm_limit = pdd->gpuvm_limit;
 807                        pAperture->scratch_base = pdd->scratch_base;
 808                        pAperture->scratch_limit = pdd->scratch_limit;
 809
 810                        dev_dbg(kfd_device,
 811                                "node id %u\n", args->num_of_nodes);
 812                        dev_dbg(kfd_device,
 813                                "gpu id %u\n", pdd->dev->id);
 814                        dev_dbg(kfd_device,
 815                                "lds_base %llX\n", pdd->lds_base);
 816                        dev_dbg(kfd_device,
 817                                "lds_limit %llX\n", pdd->lds_limit);
 818                        dev_dbg(kfd_device,
 819                                "gpuvm_base %llX\n", pdd->gpuvm_base);
 820                        dev_dbg(kfd_device,
 821                                "gpuvm_limit %llX\n", pdd->gpuvm_limit);
 822                        dev_dbg(kfd_device,
 823                                "scratch_base %llX\n", pdd->scratch_base);
 824                        dev_dbg(kfd_device,
 825                                "scratch_limit %llX\n", pdd->scratch_limit);
 826
 827                        args->num_of_nodes++;
 828
 829                        pdd = kfd_get_next_process_device_data(p, pdd);
 830                } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
 831        }
 832
 833        mutex_unlock(&p->mutex);
 834
 835        return 0;
 836}
 837
 838static int kfd_ioctl_get_process_apertures_new(struct file *filp,
 839                                struct kfd_process *p, void *data)
 840{
 841        struct kfd_ioctl_get_process_apertures_new_args *args = data;
 842        struct kfd_process_device_apertures *pa;
 843        struct kfd_process_device *pdd;
 844        uint32_t nodes = 0;
 845        int ret;
 846
 847        dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
 848
 849        if (args->num_of_nodes == 0) {
 850                /* Return number of nodes, so that user space can alloacate
 851                 * sufficient memory
 852                 */
 853                mutex_lock(&p->mutex);
 854
 855                if (!kfd_has_process_device_data(p))
 856                        goto out_unlock;
 857
 858                /* Run over all pdd of the process */
 859                pdd = kfd_get_first_process_device_data(p);
 860                do {
 861                        args->num_of_nodes++;
 862                        pdd = kfd_get_next_process_device_data(p, pdd);
 863                } while (pdd);
 864
 865                goto out_unlock;
 866        }
 867
 868        /* Fill in process-aperture information for all available
 869         * nodes, but not more than args->num_of_nodes as that is
 870         * the amount of memory allocated by user
 871         */
 872        pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
 873                                args->num_of_nodes), GFP_KERNEL);
 874        if (!pa)
 875                return -ENOMEM;
 876
 877        mutex_lock(&p->mutex);
 878
 879        if (!kfd_has_process_device_data(p)) {
 880                args->num_of_nodes = 0;
 881                kfree(pa);
 882                goto out_unlock;
 883        }
 884
 885        /* Run over all pdd of the process */
 886        pdd = kfd_get_first_process_device_data(p);
 887        do {
 888                pa[nodes].gpu_id = pdd->dev->id;
 889                pa[nodes].lds_base = pdd->lds_base;
 890                pa[nodes].lds_limit = pdd->lds_limit;
 891                pa[nodes].gpuvm_base = pdd->gpuvm_base;
 892                pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
 893                pa[nodes].scratch_base = pdd->scratch_base;
 894                pa[nodes].scratch_limit = pdd->scratch_limit;
 895
 896                dev_dbg(kfd_device,
 897                        "gpu id %u\n", pdd->dev->id);
 898                dev_dbg(kfd_device,
 899                        "lds_base %llX\n", pdd->lds_base);
 900                dev_dbg(kfd_device,
 901                        "lds_limit %llX\n", pdd->lds_limit);
 902                dev_dbg(kfd_device,
 903                        "gpuvm_base %llX\n", pdd->gpuvm_base);
 904                dev_dbg(kfd_device,
 905                        "gpuvm_limit %llX\n", pdd->gpuvm_limit);
 906                dev_dbg(kfd_device,
 907                        "scratch_base %llX\n", pdd->scratch_base);
 908                dev_dbg(kfd_device,
 909                        "scratch_limit %llX\n", pdd->scratch_limit);
 910                nodes++;
 911
 912                pdd = kfd_get_next_process_device_data(p, pdd);
 913        } while (pdd && (nodes < args->num_of_nodes));
 914        mutex_unlock(&p->mutex);
 915
 916        args->num_of_nodes = nodes;
 917        ret = copy_to_user(
 918                        (void __user *)args->kfd_process_device_apertures_ptr,
 919                        pa,
 920                        (nodes * sizeof(struct kfd_process_device_apertures)));
 921        kfree(pa);
 922        return ret ? -EFAULT : 0;
 923
 924out_unlock:
 925        mutex_unlock(&p->mutex);
 926        return 0;
 927}
 928
 929static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
 930                                        void *data)
 931{
 932        struct kfd_ioctl_create_event_args *args = data;
 933        int err;
 934
 935        /* For dGPUs the event page is allocated in user mode. The
 936         * handle is passed to KFD with the first call to this IOCTL
 937         * through the event_page_offset field.
 938         */
 939        if (args->event_page_offset) {
 940                struct kfd_dev *kfd;
 941                struct kfd_process_device *pdd;
 942                void *mem, *kern_addr;
 943                uint64_t size;
 944
 945                if (p->signal_page) {
 946                        pr_err("Event page is already set\n");
 947                        return -EINVAL;
 948                }
 949
 950                kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
 951                if (!kfd) {
 952                        pr_err("Getting device by id failed in %s\n", __func__);
 953                        return -EINVAL;
 954                }
 955
 956                mutex_lock(&p->mutex);
 957                pdd = kfd_bind_process_to_device(kfd, p);
 958                if (IS_ERR(pdd)) {
 959                        err = PTR_ERR(pdd);
 960                        goto out_unlock;
 961                }
 962
 963                mem = kfd_process_device_translate_handle(pdd,
 964                                GET_IDR_HANDLE(args->event_page_offset));
 965                if (!mem) {
 966                        pr_err("Can't find BO, offset is 0x%llx\n",
 967                               args->event_page_offset);
 968                        err = -EINVAL;
 969                        goto out_unlock;
 970                }
 971                mutex_unlock(&p->mutex);
 972
 973                err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd,
 974                                                mem, &kern_addr, &size);
 975                if (err) {
 976                        pr_err("Failed to map event page to kernel\n");
 977                        return err;
 978                }
 979
 980                err = kfd_event_page_set(p, kern_addr, size);
 981                if (err) {
 982                        pr_err("Failed to set event page\n");
 983                        return err;
 984                }
 985        }
 986
 987        err = kfd_event_create(filp, p, args->event_type,
 988                                args->auto_reset != 0, args->node_id,
 989                                &args->event_id, &args->event_trigger_data,
 990                                &args->event_page_offset,
 991                                &args->event_slot_index);
 992
 993        return err;
 994
 995out_unlock:
 996        mutex_unlock(&p->mutex);
 997        return err;
 998}
 999
1000static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
1001                                        void *data)
1002{
1003        struct kfd_ioctl_destroy_event_args *args = data;
1004
1005        return kfd_event_destroy(p, args->event_id);
1006}
1007
1008static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
1009                                void *data)
1010{
1011        struct kfd_ioctl_set_event_args *args = data;
1012
1013        return kfd_set_event(p, args->event_id);
1014}
1015
1016static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
1017                                void *data)
1018{
1019        struct kfd_ioctl_reset_event_args *args = data;
1020
1021        return kfd_reset_event(p, args->event_id);
1022}
1023
1024static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
1025                                void *data)
1026{
1027        struct kfd_ioctl_wait_events_args *args = data;
1028        int err;
1029
1030        err = kfd_wait_on_events(p, args->num_events,
1031                        (void __user *)args->events_ptr,
1032                        (args->wait_for_all != 0),
1033                        args->timeout, &args->wait_result);
1034
1035        return err;
1036}
1037static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
1038                                        struct kfd_process *p, void *data)
1039{
1040        struct kfd_ioctl_set_scratch_backing_va_args *args = data;
1041        struct kfd_process_device *pdd;
1042        struct kfd_dev *dev;
1043        long err;
1044
1045        dev = kfd_device_by_id(args->gpu_id);
1046        if (!dev)
1047                return -EINVAL;
1048
1049        mutex_lock(&p->mutex);
1050
1051        pdd = kfd_bind_process_to_device(dev, p);
1052        if (IS_ERR(pdd)) {
1053                err = PTR_ERR(pdd);
1054                goto bind_process_to_device_fail;
1055        }
1056
1057        pdd->qpd.sh_hidden_private_base = args->va_addr;
1058
1059        mutex_unlock(&p->mutex);
1060
1061        if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
1062            pdd->qpd.vmid != 0)
1063                dev->kfd2kgd->set_scratch_backing_va(
1064                        dev->kgd, args->va_addr, pdd->qpd.vmid);
1065
1066        return 0;
1067
1068bind_process_to_device_fail:
1069        mutex_unlock(&p->mutex);
1070        return err;
1071}
1072
1073static int kfd_ioctl_get_tile_config(struct file *filep,
1074                struct kfd_process *p, void *data)
1075{
1076        struct kfd_ioctl_get_tile_config_args *args = data;
1077        struct kfd_dev *dev;
1078        struct tile_config config;
1079        int err = 0;
1080
1081        dev = kfd_device_by_id(args->gpu_id);
1082        if (!dev)
1083                return -EINVAL;
1084
1085        dev->kfd2kgd->get_tile_config(dev->kgd, &config);
1086
1087        args->gb_addr_config = config.gb_addr_config;
1088        args->num_banks = config.num_banks;
1089        args->num_ranks = config.num_ranks;
1090
1091        if (args->num_tile_configs > config.num_tile_configs)
1092                args->num_tile_configs = config.num_tile_configs;
1093        err = copy_to_user((void __user *)args->tile_config_ptr,
1094                        config.tile_config_ptr,
1095                        args->num_tile_configs * sizeof(uint32_t));
1096        if (err) {
1097                args->num_tile_configs = 0;
1098                return -EFAULT;
1099        }
1100
1101        if (args->num_macro_tile_configs > config.num_macro_tile_configs)
1102                args->num_macro_tile_configs =
1103                                config.num_macro_tile_configs;
1104        err = copy_to_user((void __user *)args->macro_tile_config_ptr,
1105                        config.macro_tile_config_ptr,
1106                        args->num_macro_tile_configs * sizeof(uint32_t));
1107        if (err) {
1108                args->num_macro_tile_configs = 0;
1109                return -EFAULT;
1110        }
1111
1112        return 0;
1113}
1114
1115static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1116                                void *data)
1117{
1118        struct kfd_ioctl_acquire_vm_args *args = data;
1119        struct kfd_process_device *pdd;
1120        struct kfd_dev *dev;
1121        struct file *drm_file;
1122        int ret;
1123
1124        dev = kfd_device_by_id(args->gpu_id);
1125        if (!dev)
1126                return -EINVAL;
1127
1128        drm_file = fget(args->drm_fd);
1129        if (!drm_file)
1130                return -EINVAL;
1131
1132        mutex_lock(&p->mutex);
1133
1134        pdd = kfd_get_process_device_data(dev, p);
1135        if (!pdd) {
1136                ret = -EINVAL;
1137                goto err_unlock;
1138        }
1139
1140        if (pdd->drm_file) {
1141                ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1142                goto err_unlock;
1143        }
1144
1145        ret = kfd_process_device_init_vm(pdd, drm_file);
1146        if (ret)
1147                goto err_unlock;
1148        /* On success, the PDD keeps the drm_file reference */
1149        mutex_unlock(&p->mutex);
1150
1151        return 0;
1152
1153err_unlock:
1154        mutex_unlock(&p->mutex);
1155        fput(drm_file);
1156        return ret;
1157}
1158
1159static bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1160{
1161        struct kfd_local_mem_info mem_info;
1162
1163        if (debug_largebar) {
1164                pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1165                return true;
1166        }
1167
1168        if (dev->device_info->needs_iommu_device)
1169                return false;
1170
1171        dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info);
1172        if (mem_info.local_mem_size_private == 0 &&
1173                        mem_info.local_mem_size_public > 0)
1174                return true;
1175        return false;
1176}
1177
1178static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1179                                        struct kfd_process *p, void *data)
1180{
1181        struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1182        struct kfd_process_device *pdd;
1183        void *mem;
1184        struct kfd_dev *dev;
1185        int idr_handle;
1186        long err;
1187        uint64_t offset = args->mmap_offset;
1188        uint32_t flags = args->flags;
1189
1190        if (args->size == 0)
1191                return -EINVAL;
1192
1193        dev = kfd_device_by_id(args->gpu_id);
1194        if (!dev)
1195                return -EINVAL;
1196
1197        if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1198                (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1199                !kfd_dev_is_large_bar(dev)) {
1200                pr_err("Alloc host visible vram on small bar is not allowed\n");
1201                return -EINVAL;
1202        }
1203
1204        mutex_lock(&p->mutex);
1205
1206        pdd = kfd_bind_process_to_device(dev, p);
1207        if (IS_ERR(pdd)) {
1208                err = PTR_ERR(pdd);
1209                goto err_unlock;
1210        }
1211
1212        err = dev->kfd2kgd->alloc_memory_of_gpu(
1213                dev->kgd, args->va_addr, args->size,
1214                pdd->vm, (struct kgd_mem **) &mem, &offset,
1215                flags);
1216
1217        if (err)
1218                goto err_unlock;
1219
1220        idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1221        if (idr_handle < 0) {
1222                err = -EFAULT;
1223                goto err_free;
1224        }
1225
1226        mutex_unlock(&p->mutex);
1227
1228        args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1229        args->mmap_offset = offset;
1230
1231        return 0;
1232
1233err_free:
1234        dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1235err_unlock:
1236        mutex_unlock(&p->mutex);
1237        return err;
1238}
1239
1240static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1241                                        struct kfd_process *p, void *data)
1242{
1243        struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1244        struct kfd_process_device *pdd;
1245        void *mem;
1246        struct kfd_dev *dev;
1247        int ret;
1248
1249        dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1250        if (!dev)
1251                return -EINVAL;
1252
1253        mutex_lock(&p->mutex);
1254
1255        pdd = kfd_get_process_device_data(dev, p);
1256        if (!pdd) {
1257                pr_err("Process device data doesn't exist\n");
1258                ret = -EINVAL;
1259                goto err_unlock;
1260        }
1261
1262        mem = kfd_process_device_translate_handle(
1263                pdd, GET_IDR_HANDLE(args->handle));
1264        if (!mem) {
1265                ret = -EINVAL;
1266                goto err_unlock;
1267        }
1268
1269        ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1270
1271        /* If freeing the buffer failed, leave the handle in place for
1272         * clean-up during process tear-down.
1273         */
1274        if (!ret)
1275                kfd_process_device_remove_obj_handle(
1276                        pdd, GET_IDR_HANDLE(args->handle));
1277
1278err_unlock:
1279        mutex_unlock(&p->mutex);
1280        return ret;
1281}
1282
1283static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1284                                        struct kfd_process *p, void *data)
1285{
1286        struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1287        struct kfd_process_device *pdd, *peer_pdd;
1288        void *mem;
1289        struct kfd_dev *dev, *peer;
1290        long err = 0;
1291        int i;
1292        uint32_t *devices_arr = NULL;
1293
1294        dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1295        if (!dev)
1296                return -EINVAL;
1297
1298        if (!args->n_devices) {
1299                pr_debug("Device IDs array empty\n");
1300                return -EINVAL;
1301        }
1302        if (args->n_success > args->n_devices) {
1303                pr_debug("n_success exceeds n_devices\n");
1304                return -EINVAL;
1305        }
1306
1307        devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1308                                    GFP_KERNEL);
1309        if (!devices_arr)
1310                return -ENOMEM;
1311
1312        err = copy_from_user(devices_arr,
1313                             (void __user *)args->device_ids_array_ptr,
1314                             args->n_devices * sizeof(*devices_arr));
1315        if (err != 0) {
1316                err = -EFAULT;
1317                goto copy_from_user_failed;
1318        }
1319
1320        mutex_lock(&p->mutex);
1321
1322        pdd = kfd_bind_process_to_device(dev, p);
1323        if (IS_ERR(pdd)) {
1324                err = PTR_ERR(pdd);
1325                goto bind_process_to_device_failed;
1326        }
1327
1328        mem = kfd_process_device_translate_handle(pdd,
1329                                                GET_IDR_HANDLE(args->handle));
1330        if (!mem) {
1331                err = -ENOMEM;
1332                goto get_mem_obj_from_handle_failed;
1333        }
1334
1335        for (i = args->n_success; i < args->n_devices; i++) {
1336                peer = kfd_device_by_id(devices_arr[i]);
1337                if (!peer) {
1338                        pr_debug("Getting device by id failed for 0x%x\n",
1339                                 devices_arr[i]);
1340                        err = -EINVAL;
1341                        goto get_mem_obj_from_handle_failed;
1342                }
1343
1344                peer_pdd = kfd_bind_process_to_device(peer, p);
1345                if (IS_ERR(peer_pdd)) {
1346                        err = PTR_ERR(peer_pdd);
1347                        goto get_mem_obj_from_handle_failed;
1348                }
1349                err = peer->kfd2kgd->map_memory_to_gpu(
1350                        peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1351                if (err) {
1352                        pr_err("Failed to map to gpu %d/%d\n",
1353                               i, args->n_devices);
1354                        goto map_memory_to_gpu_failed;
1355                }
1356                args->n_success = i+1;
1357        }
1358
1359        mutex_unlock(&p->mutex);
1360
1361        err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1362        if (err) {
1363                pr_debug("Sync memory failed, wait interrupted by user signal\n");
1364                goto sync_memory_failed;
1365        }
1366
1367        /* Flush TLBs after waiting for the page table updates to complete */
1368        for (i = 0; i < args->n_devices; i++) {
1369                peer = kfd_device_by_id(devices_arr[i]);
1370                if (WARN_ON_ONCE(!peer))
1371                        continue;
1372                peer_pdd = kfd_get_process_device_data(peer, p);
1373                if (WARN_ON_ONCE(!peer_pdd))
1374                        continue;
1375                kfd_flush_tlb(peer_pdd);
1376        }
1377
1378        kfree(devices_arr);
1379
1380        return err;
1381
1382bind_process_to_device_failed:
1383get_mem_obj_from_handle_failed:
1384map_memory_to_gpu_failed:
1385        mutex_unlock(&p->mutex);
1386copy_from_user_failed:
1387sync_memory_failed:
1388        kfree(devices_arr);
1389
1390        return err;
1391}
1392
1393static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1394                                        struct kfd_process *p, void *data)
1395{
1396        struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1397        struct kfd_process_device *pdd, *peer_pdd;
1398        void *mem;
1399        struct kfd_dev *dev, *peer;
1400        long err = 0;
1401        uint32_t *devices_arr = NULL, i;
1402
1403        dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1404        if (!dev)
1405                return -EINVAL;
1406
1407        if (!args->n_devices) {
1408                pr_debug("Device IDs array empty\n");
1409                return -EINVAL;
1410        }
1411        if (args->n_success > args->n_devices) {
1412                pr_debug("n_success exceeds n_devices\n");
1413                return -EINVAL;
1414        }
1415
1416        devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1417                                    GFP_KERNEL);
1418        if (!devices_arr)
1419                return -ENOMEM;
1420
1421        err = copy_from_user(devices_arr,
1422                             (void __user *)args->device_ids_array_ptr,
1423                             args->n_devices * sizeof(*devices_arr));
1424        if (err != 0) {
1425                err = -EFAULT;
1426                goto copy_from_user_failed;
1427        }
1428
1429        mutex_lock(&p->mutex);
1430
1431        pdd = kfd_get_process_device_data(dev, p);
1432        if (!pdd) {
1433                err = -EINVAL;
1434                goto bind_process_to_device_failed;
1435        }
1436
1437        mem = kfd_process_device_translate_handle(pdd,
1438                                                GET_IDR_HANDLE(args->handle));
1439        if (!mem) {
1440                err = -ENOMEM;
1441                goto get_mem_obj_from_handle_failed;
1442        }
1443
1444        for (i = args->n_success; i < args->n_devices; i++) {
1445                peer = kfd_device_by_id(devices_arr[i]);
1446                if (!peer) {
1447                        err = -EINVAL;
1448                        goto get_mem_obj_from_handle_failed;
1449                }
1450
1451                peer_pdd = kfd_get_process_device_data(peer, p);
1452                if (!peer_pdd) {
1453                        err = -ENODEV;
1454                        goto get_mem_obj_from_handle_failed;
1455                }
1456                err = dev->kfd2kgd->unmap_memory_to_gpu(
1457                        peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1458                if (err) {
1459                        pr_err("Failed to unmap from gpu %d/%d\n",
1460                               i, args->n_devices);
1461                        goto unmap_memory_from_gpu_failed;
1462                }
1463                args->n_success = i+1;
1464        }
1465        kfree(devices_arr);
1466
1467        mutex_unlock(&p->mutex);
1468
1469        return 0;
1470
1471bind_process_to_device_failed:
1472get_mem_obj_from_handle_failed:
1473unmap_memory_from_gpu_failed:
1474        mutex_unlock(&p->mutex);
1475copy_from_user_failed:
1476        kfree(devices_arr);
1477        return err;
1478}
1479
1480#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1481        [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1482                            .cmd_drv = 0, .name = #ioctl}
1483
1484/** Ioctl table */
1485static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1486        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
1487                        kfd_ioctl_get_version, 0),
1488
1489        AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
1490                        kfd_ioctl_create_queue, 0),
1491
1492        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
1493                        kfd_ioctl_destroy_queue, 0),
1494
1495        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
1496                        kfd_ioctl_set_memory_policy, 0),
1497
1498        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
1499                        kfd_ioctl_get_clock_counters, 0),
1500
1501        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
1502                        kfd_ioctl_get_process_apertures, 0),
1503
1504        AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
1505                        kfd_ioctl_update_queue, 0),
1506
1507        AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
1508                        kfd_ioctl_create_event, 0),
1509
1510        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
1511                        kfd_ioctl_destroy_event, 0),
1512
1513        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
1514                        kfd_ioctl_set_event, 0),
1515
1516        AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
1517                        kfd_ioctl_reset_event, 0),
1518
1519        AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
1520                        kfd_ioctl_wait_events, 0),
1521
1522        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
1523                        kfd_ioctl_dbg_register, 0),
1524
1525        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
1526                        kfd_ioctl_dbg_unregister, 0),
1527
1528        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
1529                        kfd_ioctl_dbg_address_watch, 0),
1530
1531        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
1532                        kfd_ioctl_dbg_wave_control, 0),
1533
1534        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
1535                        kfd_ioctl_set_scratch_backing_va, 0),
1536
1537        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
1538                        kfd_ioctl_get_tile_config, 0),
1539
1540        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1541                        kfd_ioctl_set_trap_handler, 0),
1542
1543        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1544                        kfd_ioctl_get_process_apertures_new, 0),
1545
1546        AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1547                        kfd_ioctl_acquire_vm, 0),
1548
1549        AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1550                        kfd_ioctl_alloc_memory_of_gpu, 0),
1551
1552        AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1553                        kfd_ioctl_free_memory_of_gpu, 0),
1554
1555        AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1556                        kfd_ioctl_map_memory_to_gpu, 0),
1557
1558        AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1559                        kfd_ioctl_unmap_memory_from_gpu, 0),
1560
1561};
1562
1563#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
1564
1565static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1566{
1567        struct kfd_process *process;
1568        amdkfd_ioctl_t *func;
1569        const struct amdkfd_ioctl_desc *ioctl = NULL;
1570        unsigned int nr = _IOC_NR(cmd);
1571        char stack_kdata[128];
1572        char *kdata = NULL;
1573        unsigned int usize, asize;
1574        int retcode = -EINVAL;
1575
1576        if (nr >= AMDKFD_CORE_IOCTL_COUNT)
1577                goto err_i1;
1578
1579        if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
1580                u32 amdkfd_size;
1581
1582                ioctl = &amdkfd_ioctls[nr];
1583
1584                amdkfd_size = _IOC_SIZE(ioctl->cmd);
1585                usize = asize = _IOC_SIZE(cmd);
1586                if (amdkfd_size > asize)
1587                        asize = amdkfd_size;
1588
1589                cmd = ioctl->cmd;
1590        } else
1591                goto err_i1;
1592
1593        dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
1594
1595        process = kfd_get_process(current);
1596        if (IS_ERR(process)) {
1597                dev_dbg(kfd_device, "no process\n");
1598                goto err_i1;
1599        }
1600
1601        /* Do not trust userspace, use our own definition */
1602        func = ioctl->func;
1603
1604        if (unlikely(!func)) {
1605                dev_dbg(kfd_device, "no function\n");
1606                retcode = -EINVAL;
1607                goto err_i1;
1608        }
1609
1610        if (cmd & (IOC_IN | IOC_OUT)) {
1611                if (asize <= sizeof(stack_kdata)) {
1612                        kdata = stack_kdata;
1613                } else {
1614                        kdata = kmalloc(asize, GFP_KERNEL);
1615                        if (!kdata) {
1616                                retcode = -ENOMEM;
1617                                goto err_i1;
1618                        }
1619                }
1620                if (asize > usize)
1621                        memset(kdata + usize, 0, asize - usize);
1622        }
1623
1624        if (cmd & IOC_IN) {
1625                if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
1626                        retcode = -EFAULT;
1627                        goto err_i1;
1628                }
1629        } else if (cmd & IOC_OUT) {
1630                memset(kdata, 0, usize);
1631        }
1632
1633        retcode = func(filep, process, kdata);
1634
1635        if (cmd & IOC_OUT)
1636                if (copy_to_user((void __user *)arg, kdata, usize) != 0)
1637                        retcode = -EFAULT;
1638
1639err_i1:
1640        if (!ioctl)
1641                dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
1642                          task_pid_nr(current), cmd, nr);
1643
1644        if (kdata != stack_kdata)
1645                kfree(kdata);
1646
1647        if (retcode)
1648                dev_dbg(kfd_device, "ret = %d\n", retcode);
1649
1650        return retcode;
1651}
1652
1653static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
1654{
1655        struct kfd_process *process;
1656        struct kfd_dev *dev = NULL;
1657        unsigned long vm_pgoff;
1658        unsigned int gpu_id;
1659
1660        process = kfd_get_process(current);
1661        if (IS_ERR(process))
1662                return PTR_ERR(process);
1663
1664        vm_pgoff = vma->vm_pgoff;
1665        vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
1666        gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
1667        if (gpu_id)
1668                dev = kfd_device_by_id(gpu_id);
1669
1670        switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
1671        case KFD_MMAP_TYPE_DOORBELL:
1672                if (!dev)
1673                        return -ENODEV;
1674                return kfd_doorbell_mmap(dev, process, vma);
1675
1676        case KFD_MMAP_TYPE_EVENTS:
1677                return kfd_event_mmap(process, vma);
1678
1679        case KFD_MMAP_TYPE_RESERVED_MEM:
1680                if (!dev)
1681                        return -ENODEV;
1682                return kfd_reserved_mem_mmap(dev, process, vma);
1683        }
1684
1685        return -EFAULT;
1686}
1687