linux/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
<<
>>
Prefs
   1/*
   2 * Copyright 2016 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 * Authors: Christian König
  23 */
  24
  25#include <linux/dma-mapping.h>
  26#include "amdgpu.h"
  27#include "amdgpu_vm.h"
  28#include "amdgpu_atomfirmware.h"
  29#include "atom.h"
  30
  31struct amdgpu_vram_mgr {
  32        struct drm_mm mm;
  33        spinlock_t lock;
  34        atomic64_t usage;
  35        atomic64_t vis_usage;
  36};
  37
  38/**
  39 * DOC: mem_info_vram_total
  40 *
  41 * The amdgpu driver provides a sysfs API for reporting current total VRAM
  42 * available on the device
  43 * The file mem_info_vram_total is used for this and returns the total
  44 * amount of VRAM in bytes
  45 */
  46static ssize_t amdgpu_mem_info_vram_total_show(struct device *dev,
  47                struct device_attribute *attr, char *buf)
  48{
  49        struct drm_device *ddev = dev_get_drvdata(dev);
  50        struct amdgpu_device *adev = ddev->dev_private;
  51
  52        return snprintf(buf, PAGE_SIZE, "%llu\n", adev->gmc.real_vram_size);
  53}
  54
  55/**
  56 * DOC: mem_info_vis_vram_total
  57 *
  58 * The amdgpu driver provides a sysfs API for reporting current total
  59 * visible VRAM available on the device
  60 * The file mem_info_vis_vram_total is used for this and returns the total
  61 * amount of visible VRAM in bytes
  62 */
  63static ssize_t amdgpu_mem_info_vis_vram_total_show(struct device *dev,
  64                struct device_attribute *attr, char *buf)
  65{
  66        struct drm_device *ddev = dev_get_drvdata(dev);
  67        struct amdgpu_device *adev = ddev->dev_private;
  68
  69        return snprintf(buf, PAGE_SIZE, "%llu\n", adev->gmc.visible_vram_size);
  70}
  71
  72/**
  73 * DOC: mem_info_vram_used
  74 *
  75 * The amdgpu driver provides a sysfs API for reporting current total VRAM
  76 * available on the device
  77 * The file mem_info_vram_used is used for this and returns the total
  78 * amount of currently used VRAM in bytes
  79 */
  80static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
  81                struct device_attribute *attr, char *buf)
  82{
  83        struct drm_device *ddev = dev_get_drvdata(dev);
  84        struct amdgpu_device *adev = ddev->dev_private;
  85
  86        return snprintf(buf, PAGE_SIZE, "%llu\n",
  87                amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]));
  88}
  89
  90/**
  91 * DOC: mem_info_vis_vram_used
  92 *
  93 * The amdgpu driver provides a sysfs API for reporting current total of
  94 * used visible VRAM
  95 * The file mem_info_vis_vram_used is used for this and returns the total
  96 * amount of currently used visible VRAM in bytes
  97 */
  98static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev,
  99                struct device_attribute *attr, char *buf)
 100{
 101        struct drm_device *ddev = dev_get_drvdata(dev);
 102        struct amdgpu_device *adev = ddev->dev_private;
 103
 104        return snprintf(buf, PAGE_SIZE, "%llu\n",
 105                amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]));
 106}
 107
 108static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev,
 109                                                 struct device_attribute *attr,
 110                                                 char *buf)
 111{
 112        struct drm_device *ddev = dev_get_drvdata(dev);
 113        struct amdgpu_device *adev = ddev->dev_private;
 114
 115        switch (adev->gmc.vram_vendor) {
 116        case SAMSUNG:
 117                return snprintf(buf, PAGE_SIZE, "samsung\n");
 118        case INFINEON:
 119                return snprintf(buf, PAGE_SIZE, "infineon\n");
 120        case ELPIDA:
 121                return snprintf(buf, PAGE_SIZE, "elpida\n");
 122        case ETRON:
 123                return snprintf(buf, PAGE_SIZE, "etron\n");
 124        case NANYA:
 125                return snprintf(buf, PAGE_SIZE, "nanya\n");
 126        case HYNIX:
 127                return snprintf(buf, PAGE_SIZE, "hynix\n");
 128        case MOSEL:
 129                return snprintf(buf, PAGE_SIZE, "mosel\n");
 130        case WINBOND:
 131                return snprintf(buf, PAGE_SIZE, "winbond\n");
 132        case ESMT:
 133                return snprintf(buf, PAGE_SIZE, "esmt\n");
 134        case MICRON:
 135                return snprintf(buf, PAGE_SIZE, "micron\n");
 136        default:
 137                return snprintf(buf, PAGE_SIZE, "unknown\n");
 138        }
 139}
 140
 141static DEVICE_ATTR(mem_info_vram_total, S_IRUGO,
 142                   amdgpu_mem_info_vram_total_show, NULL);
 143static DEVICE_ATTR(mem_info_vis_vram_total, S_IRUGO,
 144                   amdgpu_mem_info_vis_vram_total_show,NULL);
 145static DEVICE_ATTR(mem_info_vram_used, S_IRUGO,
 146                   amdgpu_mem_info_vram_used_show, NULL);
 147static DEVICE_ATTR(mem_info_vis_vram_used, S_IRUGO,
 148                   amdgpu_mem_info_vis_vram_used_show, NULL);
 149static DEVICE_ATTR(mem_info_vram_vendor, S_IRUGO,
 150                   amdgpu_mem_info_vram_vendor, NULL);
 151
 152static const struct attribute *amdgpu_vram_mgr_attributes[] = {
 153        &dev_attr_mem_info_vram_total.attr,
 154        &dev_attr_mem_info_vis_vram_total.attr,
 155        &dev_attr_mem_info_vram_used.attr,
 156        &dev_attr_mem_info_vis_vram_used.attr,
 157        &dev_attr_mem_info_vram_vendor.attr,
 158        NULL
 159};
 160
 161/**
 162 * amdgpu_vram_mgr_init - init VRAM manager and DRM MM
 163 *
 164 * @man: TTM memory type manager
 165 * @p_size: maximum size of VRAM
 166 *
 167 * Allocate and initialize the VRAM manager.
 168 */
 169static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man,
 170                                unsigned long p_size)
 171{
 172        struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
 173        struct amdgpu_vram_mgr *mgr;
 174        int ret;
 175
 176        mgr = kzalloc(sizeof(*mgr), GFP_KERNEL);
 177        if (!mgr)
 178                return -ENOMEM;
 179
 180        drm_mm_init(&mgr->mm, 0, p_size);
 181        spin_lock_init(&mgr->lock);
 182        man->priv = mgr;
 183
 184        /* Add the two VRAM-related sysfs files */
 185        ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
 186        if (ret)
 187                DRM_ERROR("Failed to register sysfs\n");
 188
 189        return 0;
 190}
 191
 192/**
 193 * amdgpu_vram_mgr_fini - free and destroy VRAM manager
 194 *
 195 * @man: TTM memory type manager
 196 *
 197 * Destroy and free the VRAM manager, returns -EBUSY if ranges are still
 198 * allocated inside it.
 199 */
 200static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man)
 201{
 202        struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
 203        struct amdgpu_vram_mgr *mgr = man->priv;
 204
 205        spin_lock(&mgr->lock);
 206        drm_mm_takedown(&mgr->mm);
 207        spin_unlock(&mgr->lock);
 208        kfree(mgr);
 209        man->priv = NULL;
 210        sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
 211        return 0;
 212}
 213
 214/**
 215 * amdgpu_vram_mgr_vis_size - Calculate visible node size
 216 *
 217 * @adev: amdgpu device structure
 218 * @node: MM node structure
 219 *
 220 * Calculate how many bytes of the MM node are inside visible VRAM
 221 */
 222static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
 223                                    struct drm_mm_node *node)
 224{
 225        uint64_t start = node->start << PAGE_SHIFT;
 226        uint64_t end = (node->size + node->start) << PAGE_SHIFT;
 227
 228        if (start >= adev->gmc.visible_vram_size)
 229                return 0;
 230
 231        return (end > adev->gmc.visible_vram_size ?
 232                adev->gmc.visible_vram_size : end) - start;
 233}
 234
 235/**
 236 * amdgpu_vram_mgr_bo_visible_size - CPU visible BO size
 237 *
 238 * @bo: &amdgpu_bo buffer object (must be in VRAM)
 239 *
 240 * Returns:
 241 * How much of the given &amdgpu_bo buffer object lies in CPU visible VRAM.
 242 */
 243u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
 244{
 245        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 246        struct ttm_mem_reg *mem = &bo->tbo.mem;
 247        struct drm_mm_node *nodes = mem->mm_node;
 248        unsigned pages = mem->num_pages;
 249        u64 usage;
 250
 251        if (amdgpu_gmc_vram_full_visible(&adev->gmc))
 252                return amdgpu_bo_size(bo);
 253
 254        if (mem->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
 255                return 0;
 256
 257        for (usage = 0; nodes && pages; pages -= nodes->size, nodes++)
 258                usage += amdgpu_vram_mgr_vis_size(adev, nodes);
 259
 260        return usage;
 261}
 262
 263/**
 264 * amdgpu_vram_mgr_virt_start - update virtual start address
 265 *
 266 * @mem: ttm_mem_reg to update
 267 * @node: just allocated node
 268 *
 269 * Calculate a virtual BO start address to easily check if everything is CPU
 270 * accessible.
 271 */
 272static void amdgpu_vram_mgr_virt_start(struct ttm_mem_reg *mem,
 273                                       struct drm_mm_node *node)
 274{
 275        unsigned long start;
 276
 277        start = node->start + node->size;
 278        if (start > mem->num_pages)
 279                start -= mem->num_pages;
 280        else
 281                start = 0;
 282        mem->start = max(mem->start, start);
 283}
 284
 285/**
 286 * amdgpu_vram_mgr_new - allocate new ranges
 287 *
 288 * @man: TTM memory type manager
 289 * @tbo: TTM BO we need this range for
 290 * @place: placement flags and restrictions
 291 * @mem: the resulting mem object
 292 *
 293 * Allocate VRAM for the given BO.
 294 */
 295static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 296                               struct ttm_buffer_object *tbo,
 297                               const struct ttm_place *place,
 298                               struct ttm_mem_reg *mem)
 299{
 300        struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
 301        struct amdgpu_vram_mgr *mgr = man->priv;
 302        struct drm_mm *mm = &mgr->mm;
 303        struct drm_mm_node *nodes;
 304        enum drm_mm_insert_mode mode;
 305        unsigned long lpfn, num_nodes, pages_per_node, pages_left;
 306        uint64_t vis_usage = 0, mem_bytes, max_bytes;
 307        unsigned i;
 308        int r;
 309
 310        lpfn = place->lpfn;
 311        if (!lpfn)
 312                lpfn = man->size;
 313
 314        max_bytes = adev->gmc.mc_vram_size;
 315        if (tbo->type != ttm_bo_type_kernel)
 316                max_bytes -= AMDGPU_VM_RESERVED_VRAM;
 317
 318        /* bail out quickly if there's likely not enough VRAM for this BO */
 319        mem_bytes = (u64)mem->num_pages << PAGE_SHIFT;
 320        if (atomic64_add_return(mem_bytes, &mgr->usage) > max_bytes) {
 321                atomic64_sub(mem_bytes, &mgr->usage);
 322                mem->mm_node = NULL;
 323                return 0;
 324        }
 325
 326        if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
 327                pages_per_node = ~0ul;
 328                num_nodes = 1;
 329        } else {
 330#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 331                pages_per_node = HPAGE_PMD_NR;
 332#else
 333                /* default to 2MB */
 334                pages_per_node = (2UL << (20UL - PAGE_SHIFT));
 335#endif
 336                pages_per_node = max((uint32_t)pages_per_node, mem->page_alignment);
 337                num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
 338        }
 339
 340        nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes),
 341                               GFP_KERNEL | __GFP_ZERO);
 342        if (!nodes) {
 343                atomic64_sub(mem_bytes, &mgr->usage);
 344                return -ENOMEM;
 345        }
 346
 347        mode = DRM_MM_INSERT_BEST;
 348        if (place->flags & TTM_PL_FLAG_TOPDOWN)
 349                mode = DRM_MM_INSERT_HIGH;
 350
 351        mem->start = 0;
 352        pages_left = mem->num_pages;
 353
 354        spin_lock(&mgr->lock);
 355        for (i = 0; pages_left >= pages_per_node; ++i) {
 356                unsigned long pages = rounddown_pow_of_two(pages_left);
 357
 358                r = drm_mm_insert_node_in_range(mm, &nodes[i], pages,
 359                                                pages_per_node, 0,
 360                                                place->fpfn, lpfn,
 361                                                mode);
 362                if (unlikely(r))
 363                        break;
 364
 365                vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
 366                amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
 367                pages_left -= pages;
 368        }
 369
 370        for (; pages_left; ++i) {
 371                unsigned long pages = min(pages_left, pages_per_node);
 372                uint32_t alignment = mem->page_alignment;
 373
 374                if (pages == pages_per_node)
 375                        alignment = pages_per_node;
 376
 377                r = drm_mm_insert_node_in_range(mm, &nodes[i],
 378                                                pages, alignment, 0,
 379                                                place->fpfn, lpfn,
 380                                                mode);
 381                if (unlikely(r))
 382                        goto error;
 383
 384                vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
 385                amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
 386                pages_left -= pages;
 387        }
 388        spin_unlock(&mgr->lock);
 389
 390        atomic64_add(vis_usage, &mgr->vis_usage);
 391
 392        mem->mm_node = nodes;
 393
 394        return 0;
 395
 396error:
 397        while (i--)
 398                drm_mm_remove_node(&nodes[i]);
 399        spin_unlock(&mgr->lock);
 400        atomic64_sub(mem->num_pages << PAGE_SHIFT, &mgr->usage);
 401
 402        kvfree(nodes);
 403        return r == -ENOSPC ? 0 : r;
 404}
 405
 406/**
 407 * amdgpu_vram_mgr_del - free ranges
 408 *
 409 * @man: TTM memory type manager
 410 * @tbo: TTM BO we need this range for
 411 * @place: placement flags and restrictions
 412 * @mem: TTM memory object
 413 *
 414 * Free the allocated VRAM again.
 415 */
 416static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man,
 417                                struct ttm_mem_reg *mem)
 418{
 419        struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
 420        struct amdgpu_vram_mgr *mgr = man->priv;
 421        struct drm_mm_node *nodes = mem->mm_node;
 422        uint64_t usage = 0, vis_usage = 0;
 423        unsigned pages = mem->num_pages;
 424
 425        if (!mem->mm_node)
 426                return;
 427
 428        spin_lock(&mgr->lock);
 429        while (pages) {
 430                pages -= nodes->size;
 431                drm_mm_remove_node(nodes);
 432                usage += nodes->size << PAGE_SHIFT;
 433                vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes);
 434                ++nodes;
 435        }
 436        spin_unlock(&mgr->lock);
 437
 438        atomic64_sub(usage, &mgr->usage);
 439        atomic64_sub(vis_usage, &mgr->vis_usage);
 440
 441        kvfree(mem->mm_node);
 442        mem->mm_node = NULL;
 443}
 444
 445/**
 446 * amdgpu_vram_mgr_alloc_sgt - allocate and fill a sg table
 447 *
 448 * @adev: amdgpu device pointer
 449 * @mem: TTM memory object
 450 * @dev: the other device
 451 * @dir: dma direction
 452 * @sgt: resulting sg table
 453 *
 454 * Allocate and fill a sg table from a VRAM allocation.
 455 */
 456int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
 457                              struct ttm_mem_reg *mem,
 458                              struct device *dev,
 459                              enum dma_data_direction dir,
 460                              struct sg_table **sgt)
 461{
 462        struct drm_mm_node *node;
 463        struct scatterlist *sg;
 464        int num_entries = 0;
 465        unsigned int pages;
 466        int i, r;
 467
 468        *sgt = kmalloc(sizeof(*sg), GFP_KERNEL);
 469        if (!*sgt)
 470                return -ENOMEM;
 471
 472        for (pages = mem->num_pages, node = mem->mm_node;
 473             pages; pages -= node->size, ++node)
 474                ++num_entries;
 475
 476        r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
 477        if (r)
 478                goto error_free;
 479
 480        for_each_sg((*sgt)->sgl, sg, num_entries, i)
 481                sg->length = 0;
 482
 483        node = mem->mm_node;
 484        for_each_sg((*sgt)->sgl, sg, num_entries, i) {
 485                phys_addr_t phys = (node->start << PAGE_SHIFT) +
 486                        adev->gmc.aper_base;
 487                size_t size = node->size << PAGE_SHIFT;
 488                dma_addr_t addr;
 489
 490                ++node;
 491                addr = dma_map_resource(dev, phys, size, dir,
 492                                        DMA_ATTR_SKIP_CPU_SYNC);
 493                r = dma_mapping_error(dev, addr);
 494                if (r)
 495                        goto error_unmap;
 496
 497                sg_set_page(sg, NULL, size, 0);
 498                sg_dma_address(sg) = addr;
 499                sg_dma_len(sg) = size;
 500        }
 501        return 0;
 502
 503error_unmap:
 504        for_each_sg((*sgt)->sgl, sg, num_entries, i) {
 505                if (!sg->length)
 506                        continue;
 507
 508                dma_unmap_resource(dev, sg->dma_address,
 509                                   sg->length, dir,
 510                                   DMA_ATTR_SKIP_CPU_SYNC);
 511        }
 512        sg_free_table(*sgt);
 513
 514error_free:
 515        kfree(*sgt);
 516        return r;
 517}
 518
 519/**
 520 * amdgpu_vram_mgr_alloc_sgt - allocate and fill a sg table
 521 *
 522 * @adev: amdgpu device pointer
 523 * @sgt: sg table to free
 524 *
 525 * Free a previously allocate sg table.
 526 */
 527void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev,
 528                              struct device *dev,
 529                              enum dma_data_direction dir,
 530                              struct sg_table *sgt)
 531{
 532        struct scatterlist *sg;
 533        int i;
 534
 535        for_each_sg(sgt->sgl, sg, sgt->nents, i)
 536                dma_unmap_resource(dev, sg->dma_address,
 537                                   sg->length, dir,
 538                                   DMA_ATTR_SKIP_CPU_SYNC);
 539        sg_free_table(sgt);
 540        kfree(sgt);
 541}
 542
 543/**
 544 * amdgpu_vram_mgr_usage - how many bytes are used in this domain
 545 *
 546 * @man: TTM memory type manager
 547 *
 548 * Returns how many bytes are used in this domain.
 549 */
 550uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man)
 551{
 552        struct amdgpu_vram_mgr *mgr = man->priv;
 553
 554        return atomic64_read(&mgr->usage);
 555}
 556
 557/**
 558 * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part
 559 *
 560 * @man: TTM memory type manager
 561 *
 562 * Returns how many bytes are used in the visible part of VRAM
 563 */
 564uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man)
 565{
 566        struct amdgpu_vram_mgr *mgr = man->priv;
 567
 568        return atomic64_read(&mgr->vis_usage);
 569}
 570
 571/**
 572 * amdgpu_vram_mgr_debug - dump VRAM table
 573 *
 574 * @man: TTM memory type manager
 575 * @printer: DRM printer to use
 576 *
 577 * Dump the table content using printk.
 578 */
 579static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man,
 580                                  struct drm_printer *printer)
 581{
 582        struct amdgpu_vram_mgr *mgr = man->priv;
 583
 584        spin_lock(&mgr->lock);
 585        drm_mm_print(&mgr->mm, printer);
 586        spin_unlock(&mgr->lock);
 587
 588        drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
 589                   man->size, amdgpu_vram_mgr_usage(man) >> 20,
 590                   amdgpu_vram_mgr_vis_usage(man) >> 20);
 591}
 592
 593const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = {
 594        .init           = amdgpu_vram_mgr_init,
 595        .takedown       = amdgpu_vram_mgr_fini,
 596        .get_node       = amdgpu_vram_mgr_new,
 597        .put_node       = amdgpu_vram_mgr_del,
 598        .debug          = amdgpu_vram_mgr_debug
 599};
 600