linux/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
<<
>>
Prefs
   1/*
   2 * Copyright 2020 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/poll.h>
  24#include <linux/wait.h>
  25#include <linux/anon_inodes.h>
  26#include <uapi/linux/kfd_ioctl.h>
  27#include "amdgpu.h"
  28#include "amdgpu_vm.h"
  29#include "kfd_priv.h"
  30#include "kfd_smi_events.h"
  31
  32struct kfd_smi_client {
  33        struct list_head list;
  34        struct kfifo fifo;
  35        wait_queue_head_t wait_queue;
  36        /* events enabled */
  37        uint64_t events;
  38        struct kfd_dev *dev;
  39        spinlock_t lock;
  40};
  41
  42#define MAX_KFIFO_SIZE  1024
  43
  44static __poll_t kfd_smi_ev_poll(struct file *, struct poll_table_struct *);
  45static ssize_t kfd_smi_ev_read(struct file *, char __user *, size_t, loff_t *);
  46static ssize_t kfd_smi_ev_write(struct file *, const char __user *, size_t,
  47                                loff_t *);
  48static int kfd_smi_ev_release(struct inode *, struct file *);
  49
  50static const char kfd_smi_name[] = "kfd_smi_ev";
  51
  52static const struct file_operations kfd_smi_ev_fops = {
  53        .owner = THIS_MODULE,
  54        .poll = kfd_smi_ev_poll,
  55        .read = kfd_smi_ev_read,
  56        .write = kfd_smi_ev_write,
  57        .release = kfd_smi_ev_release
  58};
  59
  60static __poll_t kfd_smi_ev_poll(struct file *filep,
  61                                struct poll_table_struct *wait)
  62{
  63        struct kfd_smi_client *client = filep->private_data;
  64        __poll_t mask = 0;
  65
  66        poll_wait(filep, &client->wait_queue, wait);
  67
  68        spin_lock(&client->lock);
  69        if (!kfifo_is_empty(&client->fifo))
  70                mask = EPOLLIN | EPOLLRDNORM;
  71        spin_unlock(&client->lock);
  72
  73        return mask;
  74}
  75
  76static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
  77                               size_t size, loff_t *offset)
  78{
  79        int ret;
  80        size_t to_copy;
  81        struct kfd_smi_client *client = filep->private_data;
  82        unsigned char *buf;
  83
  84        buf = kmalloc_array(MAX_KFIFO_SIZE, sizeof(*buf), GFP_KERNEL);
  85        if (!buf)
  86                return -ENOMEM;
  87
  88        /* kfifo_to_user can sleep so we can't use spinlock protection around
  89         * it. Instead, we kfifo out as spinlocked then copy them to the user.
  90         */
  91        spin_lock(&client->lock);
  92        to_copy = kfifo_len(&client->fifo);
  93        if (!to_copy) {
  94                spin_unlock(&client->lock);
  95                ret = -EAGAIN;
  96                goto ret_err;
  97        }
  98        to_copy = min3(size, sizeof(buf), to_copy);
  99        ret = kfifo_out(&client->fifo, buf, to_copy);
 100        spin_unlock(&client->lock);
 101        if (ret <= 0) {
 102                ret = -EAGAIN;
 103                goto ret_err;
 104        }
 105
 106        ret = copy_to_user(user, buf, to_copy);
 107        if (ret) {
 108                ret = -EFAULT;
 109                goto ret_err;
 110        }
 111
 112        kfree(buf);
 113        return to_copy;
 114
 115ret_err:
 116        kfree(buf);
 117        return ret;
 118}
 119
 120static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user,
 121                                size_t size, loff_t *offset)
 122{
 123        struct kfd_smi_client *client = filep->private_data;
 124        uint64_t events;
 125
 126        if (!access_ok(user, size) || size < sizeof(events))
 127                return -EFAULT;
 128        if (copy_from_user(&events, user, sizeof(events)))
 129                return -EFAULT;
 130
 131        WRITE_ONCE(client->events, events);
 132
 133        return sizeof(events);
 134}
 135
 136static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
 137{
 138        struct kfd_smi_client *client = filep->private_data;
 139        struct kfd_dev *dev = client->dev;
 140
 141        spin_lock(&dev->smi_lock);
 142        list_del_rcu(&client->list);
 143        spin_unlock(&dev->smi_lock);
 144
 145        synchronize_rcu();
 146        kfifo_free(&client->fifo);
 147        kfree(client);
 148
 149        return 0;
 150}
 151
 152static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
 153                              char *event_msg, int len)
 154{
 155        struct kfd_smi_client *client;
 156
 157        rcu_read_lock();
 158
 159        list_for_each_entry_rcu(client, &dev->smi_clients, list) {
 160                if (!(READ_ONCE(client->events) &
 161                                KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event)))
 162                        continue;
 163                spin_lock(&client->lock);
 164                if (kfifo_avail(&client->fifo) >= len) {
 165                        kfifo_in(&client->fifo, event_msg, len);
 166                        wake_up_all(&client->wait_queue);
 167                } else {
 168                        pr_debug("smi_event(EventID: %u): no space left\n",
 169                                        smi_event);
 170                }
 171                spin_unlock(&client->lock);
 172        }
 173
 174        rcu_read_unlock();
 175}
 176
 177void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
 178{
 179        /*
 180         * GpuReset msg = Reset seq number (incremented for
 181         * every reset message sent before GPU reset).
 182         * 1 byte event + 1 byte space + 8 bytes seq num +
 183         * 1 byte \n + 1 byte \0 = 12
 184         */
 185        char fifo_in[12];
 186        int len;
 187        unsigned int event;
 188
 189        if (list_empty(&dev->smi_clients))
 190                return;
 191
 192        memset(fifo_in, 0x0, sizeof(fifo_in));
 193
 194        if (post_reset) {
 195                event = KFD_SMI_EVENT_GPU_POST_RESET;
 196        } else {
 197                event = KFD_SMI_EVENT_GPU_PRE_RESET;
 198                ++(dev->reset_seq_num);
 199        }
 200
 201        len = snprintf(fifo_in, sizeof(fifo_in), "%x %x\n", event,
 202                                                dev->reset_seq_num);
 203
 204        add_event_to_kfifo(dev, event, fifo_in, len);
 205}
 206
 207void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 208                                             uint64_t throttle_bitmask)
 209{
 210        struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
 211        /*
 212         * ThermalThrottle msg = throttle_bitmask(8):
 213         *                       thermal_interrupt_count(16):
 214         * 1 byte event + 1 byte space + 16 byte throttle_bitmask +
 215         * 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
 216         * 1 byte \0 = 37
 217         */
 218        char fifo_in[37];
 219        int len;
 220
 221        if (list_empty(&dev->smi_clients))
 222                return;
 223
 224        len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n",
 225                       KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
 226                       atomic64_read(&adev->smu.throttle_int_counter));
 227
 228        add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
 229}
 230
 231void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
 232{
 233        struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
 234        struct amdgpu_task_info task_info;
 235        /* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
 236        /* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n +
 237         * 1 byte \0 = 29
 238         */
 239        char fifo_in[29];
 240        int len;
 241
 242        if (list_empty(&dev->smi_clients))
 243                return;
 244
 245        memset(&task_info, 0, sizeof(struct amdgpu_task_info));
 246        amdgpu_vm_get_task_info(adev, pasid, &task_info);
 247        /* Report VM faults from user applications, not retry from kernel */
 248        if (!task_info.pid)
 249                return;
 250
 251        len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
 252                task_info.pid, task_info.task_name);
 253
 254        add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
 255}
 256
 257int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
 258{
 259        struct kfd_smi_client *client;
 260        int ret;
 261
 262        client = kzalloc(sizeof(struct kfd_smi_client), GFP_KERNEL);
 263        if (!client)
 264                return -ENOMEM;
 265        INIT_LIST_HEAD(&client->list);
 266
 267        ret = kfifo_alloc(&client->fifo, MAX_KFIFO_SIZE, GFP_KERNEL);
 268        if (ret) {
 269                kfree(client);
 270                return ret;
 271        }
 272
 273        ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
 274                               O_RDWR);
 275        if (ret < 0) {
 276                kfifo_free(&client->fifo);
 277                kfree(client);
 278                return ret;
 279        }
 280        *fd = ret;
 281
 282        init_waitqueue_head(&client->wait_queue);
 283        spin_lock_init(&client->lock);
 284        client->events = 0;
 285        client->dev = dev;
 286
 287        spin_lock(&dev->smi_lock);
 288        list_add_rcu(&client->list, &dev->smi_clients);
 289        spin_unlock(&dev->smi_lock);
 290
 291        return 0;
 292}
 293