linux/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22#include "kfd_priv.h"
  23#include <linux/mm.h>
  24#include <linux/mman.h>
  25#include <linux/slab.h>
  26#include <linux/io.h>
  27#include <linux/idr.h>
  28
  29/*
  30 * This extension supports a kernel level doorbells management for the
  31 * kernel queues using the first doorbell page reserved for the kernel.
  32 */
  33
  34/*
  35 * Each device exposes a doorbell aperture, a PCI MMIO aperture that
  36 * receives 32-bit writes that are passed to queues as wptr values.
  37 * The doorbells are intended to be written by applications as part
  38 * of queueing work on user-mode queues.
  39 * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks.
  40 * We map the doorbell address space into user-mode when a process creates
  41 * its first queue on each device.
  42 * Although the mapping is done by KFD, it is equivalent to an mmap of
  43 * the /dev/kfd with the particular device encoded in the mmap offset.
  44 * There will be other uses for mmap of /dev/kfd, so only a range of
  45 * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells.
  46 */
  47
  48/* # of doorbell bytes allocated for each process. */
  49size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
  50{
  51        return roundup(kfd->device_info->doorbell_size *
  52                        KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
  53                        PAGE_SIZE);
  54}
  55
  56/* Doorbell calculations for device init. */
  57int kfd_doorbell_init(struct kfd_dev *kfd)
  58{
  59        size_t doorbell_start_offset;
  60        size_t doorbell_aperture_size;
  61        size_t doorbell_process_limit;
  62
  63        /*
  64         * We start with calculations in bytes because the input data might
  65         * only be byte-aligned.
  66         * Only after we have done the rounding can we assume any alignment.
  67         */
  68
  69        doorbell_start_offset =
  70                        roundup(kfd->shared_resources.doorbell_start_offset,
  71                                        kfd_doorbell_process_slice(kfd));
  72
  73        doorbell_aperture_size =
  74                        rounddown(kfd->shared_resources.doorbell_aperture_size,
  75                                        kfd_doorbell_process_slice(kfd));
  76
  77        if (doorbell_aperture_size > doorbell_start_offset)
  78                doorbell_process_limit =
  79                        (doorbell_aperture_size - doorbell_start_offset) /
  80                                                kfd_doorbell_process_slice(kfd);
  81        else
  82                return -ENOSPC;
  83
  84        if (!kfd->max_doorbell_slices ||
  85            doorbell_process_limit < kfd->max_doorbell_slices)
  86                kfd->max_doorbell_slices = doorbell_process_limit;
  87
  88        kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
  89                                doorbell_start_offset;
  90
  91        kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
  92
  93        kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
  94                                           kfd_doorbell_process_slice(kfd));
  95
  96        if (!kfd->doorbell_kernel_ptr)
  97                return -ENOMEM;
  98
  99        pr_debug("Doorbell initialization:\n");
 100        pr_debug("doorbell base           == 0x%08lX\n",
 101                        (uintptr_t)kfd->doorbell_base);
 102
 103        pr_debug("doorbell_base_dw_offset      == 0x%08lX\n",
 104                        kfd->doorbell_base_dw_offset);
 105
 106        pr_debug("doorbell_process_limit  == 0x%08lX\n",
 107                        doorbell_process_limit);
 108
 109        pr_debug("doorbell_kernel_offset  == 0x%08lX\n",
 110                        (uintptr_t)kfd->doorbell_base);
 111
 112        pr_debug("doorbell aperture size  == 0x%08lX\n",
 113                        kfd->shared_resources.doorbell_aperture_size);
 114
 115        pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
 116
 117        return 0;
 118}
 119
 120void kfd_doorbell_fini(struct kfd_dev *kfd)
 121{
 122        if (kfd->doorbell_kernel_ptr)
 123                iounmap(kfd->doorbell_kernel_ptr);
 124}
 125
 126int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
 127                      struct vm_area_struct *vma)
 128{
 129        phys_addr_t address;
 130        struct kfd_process_device *pdd;
 131
 132        /*
 133         * For simplicitly we only allow mapping of the entire doorbell
 134         * allocation of a single device & process.
 135         */
 136        if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
 137                return -EINVAL;
 138
 139        pdd = kfd_get_process_device_data(dev, process);
 140        if (!pdd)
 141                return -EINVAL;
 142
 143        /* Calculate physical address of doorbell */
 144        address = kfd_get_process_doorbells(pdd);
 145        vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
 146                                VM_DONTDUMP | VM_PFNMAP;
 147
 148        vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 149
 150        pr_debug("Mapping doorbell page\n"
 151                 "     target user address == 0x%08llX\n"
 152                 "     physical address    == 0x%08llX\n"
 153                 "     vm_flags            == 0x%04lX\n"
 154                 "     size                == 0x%04lX\n",
 155                 (unsigned long long) vma->vm_start, address, vma->vm_flags,
 156                 kfd_doorbell_process_slice(dev));
 157
 158
 159        return io_remap_pfn_range(vma,
 160                                vma->vm_start,
 161                                address >> PAGE_SHIFT,
 162                                kfd_doorbell_process_slice(dev),
 163                                vma->vm_page_prot);
 164}
 165
 166
 167/* get kernel iomem pointer for a doorbell */
 168void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
 169                                        unsigned int *doorbell_off)
 170{
 171        u32 inx;
 172
 173        mutex_lock(&kfd->doorbell_mutex);
 174        inx = find_first_zero_bit(kfd->doorbell_available_index,
 175                                        KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
 176
 177        __set_bit(inx, kfd->doorbell_available_index);
 178        mutex_unlock(&kfd->doorbell_mutex);
 179
 180        if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
 181                return NULL;
 182
 183        inx *= kfd->device_info->doorbell_size / sizeof(u32);
 184
 185        /*
 186         * Calculating the kernel doorbell offset using the first
 187         * doorbell page.
 188         */
 189        *doorbell_off = kfd->doorbell_base_dw_offset + inx;
 190
 191        pr_debug("Get kernel queue doorbell\n"
 192                        "     doorbell offset   == 0x%08X\n"
 193                        "     doorbell index    == 0x%x\n",
 194                *doorbell_off, inx);
 195
 196        return kfd->doorbell_kernel_ptr + inx;
 197}
 198
 199void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
 200{
 201        unsigned int inx;
 202
 203        inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
 204                * sizeof(u32) / kfd->device_info->doorbell_size;
 205
 206        mutex_lock(&kfd->doorbell_mutex);
 207        __clear_bit(inx, kfd->doorbell_available_index);
 208        mutex_unlock(&kfd->doorbell_mutex);
 209}
 210
 211void write_kernel_doorbell(void __iomem *db, u32 value)
 212{
 213        if (db) {
 214                writel(value, db);
 215                pr_debug("Writing %d to doorbell address %p\n", value, db);
 216        }
 217}
 218
 219void write_kernel_doorbell64(void __iomem *db, u64 value)
 220{
 221        if (db) {
 222                WARN(((unsigned long)db & 7) != 0,
 223                     "Unaligned 64-bit doorbell");
 224                writeq(value, (u64 __iomem *)db);
 225                pr_debug("writing %llu to doorbell address %p\n", value, db);
 226        }
 227}
 228
 229unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
 230                                        struct kfd_process_device *pdd,
 231                                        unsigned int doorbell_id)
 232{
 233        /*
 234         * doorbell_base_dw_offset accounts for doorbells taken by KGD.
 235         * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
 236         * the process's doorbells. The offset returned is in dword
 237         * units regardless of the ASIC-dependent doorbell size.
 238         */
 239        return kfd->doorbell_base_dw_offset +
 240                pdd->doorbell_index
 241                * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
 242                doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
 243}
 244
 245uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
 246{
 247        uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
 248                                kfd->shared_resources.doorbell_start_offset) /
 249                                        kfd_doorbell_process_slice(kfd) + 1;
 250
 251        return num_of_elems;
 252
 253}
 254
 255phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
 256{
 257        return pdd->dev->doorbell_base +
 258                pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev);
 259}
 260
 261int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
 262{
 263        int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices,
 264                                GFP_KERNEL);
 265        if (r > 0)
 266                *doorbell_index = r;
 267
 268        return r;
 269}
 270
 271void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
 272{
 273        if (doorbell_index)
 274                ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
 275}
 276