linux/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2014-2016 Intel Corporation
   5 */
   6
   7#include <linux/pagevec.h>
   8#include <linux/swap.h>
   9
  10#include "gem/i915_gem_region.h"
  11#include "i915_drv.h"
  12#include "i915_gemfs.h"
  13#include "i915_gem_object.h"
  14#include "i915_scatterlist.h"
  15#include "i915_trace.h"
  16
  17/*
  18 * Move pages to appropriate lru and release the pagevec, decrementing the
  19 * ref count of those pages.
  20 */
  21static void check_release_pagevec(struct pagevec *pvec)
  22{
  23        check_move_unevictable_pages(pvec);
  24        __pagevec_release(pvec);
  25        cond_resched();
  26}
  27
  28static int shmem_get_pages(struct drm_i915_gem_object *obj)
  29{
  30        struct drm_i915_private *i915 = to_i915(obj->base.dev);
  31        struct intel_memory_region *mem = obj->mm.region;
  32        const unsigned long page_count = obj->base.size / PAGE_SIZE;
  33        unsigned long i;
  34        struct address_space *mapping;
  35        struct sg_table *st;
  36        struct scatterlist *sg;
  37        struct sgt_iter sgt_iter;
  38        struct page *page;
  39        unsigned long last_pfn = 0;     /* suppress gcc warning */
  40        unsigned int max_segment = i915_sg_segment_size();
  41        unsigned int sg_page_sizes;
  42        gfp_t noreclaim;
  43        int ret;
  44
  45        /*
  46         * Assert that the object is not currently in any GPU domain. As it
  47         * wasn't in the GTT, there shouldn't be any way it could have been in
  48         * a GPU cache
  49         */
  50        GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
  51        GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
  52
  53        /*
  54         * If there's no chance of allocating enough pages for the whole
  55         * object, bail early.
  56         */
  57        if (obj->base.size > resource_size(&mem->region))
  58                return -ENOMEM;
  59
  60        st = kmalloc(sizeof(*st), GFP_KERNEL);
  61        if (!st)
  62                return -ENOMEM;
  63
  64rebuild_st:
  65        if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
  66                kfree(st);
  67                return -ENOMEM;
  68        }
  69
  70        /*
  71         * Get the list of pages out of our struct file.  They'll be pinned
  72         * at this point until we release them.
  73         *
  74         * Fail silently without starting the shrinker
  75         */
  76        mapping = obj->base.filp->f_mapping;
  77        mapping_set_unevictable(mapping);
  78        noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
  79        noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
  80
  81        sg = st->sgl;
  82        st->nents = 0;
  83        sg_page_sizes = 0;
  84        for (i = 0; i < page_count; i++) {
  85                const unsigned int shrink[] = {
  86                        I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
  87                        0,
  88                }, *s = shrink;
  89                gfp_t gfp = noreclaim;
  90
  91                do {
  92                        cond_resched();
  93                        page = shmem_read_mapping_page_gfp(mapping, i, gfp);
  94                        if (!IS_ERR(page))
  95                                break;
  96
  97                        if (!*s) {
  98                                ret = PTR_ERR(page);
  99                                goto err_sg;
 100                        }
 101
 102                        i915_gem_shrink(i915, 2 * page_count, NULL, *s++);
 103
 104                        /*
 105                         * We've tried hard to allocate the memory by reaping
 106                         * our own buffer, now let the real VM do its job and
 107                         * go down in flames if truly OOM.
 108                         *
 109                         * However, since graphics tend to be disposable,
 110                         * defer the oom here by reporting the ENOMEM back
 111                         * to userspace.
 112                         */
 113                        if (!*s) {
 114                                /* reclaim and warn, but no oom */
 115                                gfp = mapping_gfp_mask(mapping);
 116
 117                                /*
 118                                 * Our bo are always dirty and so we require
 119                                 * kswapd to reclaim our pages (direct reclaim
 120                                 * does not effectively begin pageout of our
 121                                 * buffers on its own). However, direct reclaim
 122                                 * only waits for kswapd when under allocation
 123                                 * congestion. So as a result __GFP_RECLAIM is
 124                                 * unreliable and fails to actually reclaim our
 125                                 * dirty pages -- unless you try over and over
 126                                 * again with !__GFP_NORETRY. However, we still
 127                                 * want to fail this allocation rather than
 128                                 * trigger the out-of-memory killer and for
 129                                 * this we want __GFP_RETRY_MAYFAIL.
 130                                 */
 131                                gfp |= __GFP_RETRY_MAYFAIL;
 132                        }
 133                } while (1);
 134
 135                if (!i ||
 136                    sg->length >= max_segment ||
 137                    page_to_pfn(page) != last_pfn + 1) {
 138                        if (i) {
 139                                sg_page_sizes |= sg->length;
 140                                sg = sg_next(sg);
 141                        }
 142                        st->nents++;
 143                        sg_set_page(sg, page, PAGE_SIZE, 0);
 144                } else {
 145                        sg->length += PAGE_SIZE;
 146                }
 147                last_pfn = page_to_pfn(page);
 148
 149                /* Check that the i965g/gm workaround works. */
 150                GEM_BUG_ON(gfp & __GFP_DMA32 && last_pfn >= 0x00100000UL);
 151        }
 152        if (sg) { /* loop terminated early; short sg table */
 153                sg_page_sizes |= sg->length;
 154                sg_mark_end(sg);
 155        }
 156
 157        /* Trim unused sg entries to avoid wasting memory. */
 158        i915_sg_trim(st);
 159
 160        ret = i915_gem_gtt_prepare_pages(obj, st);
 161        if (ret) {
 162                /*
 163                 * DMA remapping failed? One possible cause is that
 164                 * it could not reserve enough large entries, asking
 165                 * for PAGE_SIZE chunks instead may be helpful.
 166                 */
 167                if (max_segment > PAGE_SIZE) {
 168                        for_each_sgt_page(page, sgt_iter, st)
 169                                put_page(page);
 170                        sg_free_table(st);
 171
 172                        max_segment = PAGE_SIZE;
 173                        goto rebuild_st;
 174                } else {
 175                        dev_warn(&i915->drm.pdev->dev,
 176                                 "Failed to DMA remap %lu pages\n",
 177                                 page_count);
 178                        goto err_pages;
 179                }
 180        }
 181
 182        if (i915_gem_object_needs_bit17_swizzle(obj))
 183                i915_gem_object_do_bit_17_swizzle(obj, st);
 184
 185        __i915_gem_object_set_pages(obj, st, sg_page_sizes);
 186
 187        return 0;
 188
 189err_sg:
 190        sg_mark_end(sg);
 191err_pages:
 192        mapping_clear_unevictable(mapping);
 193        if (sg != st->sgl) {
 194                struct pagevec pvec;
 195
 196                pagevec_init(&pvec);
 197                for_each_sgt_page(page, sgt_iter, st) {
 198                        if (!pagevec_add(&pvec, page))
 199                                check_release_pagevec(&pvec);
 200                }
 201                if (pagevec_count(&pvec))
 202                        check_release_pagevec(&pvec);
 203        }
 204        sg_free_table(st);
 205        kfree(st);
 206
 207        /*
 208         * shmemfs first checks if there is enough memory to allocate the page
 209         * and reports ENOSPC should there be insufficient, along with the usual
 210         * ENOMEM for a genuine allocation failure.
 211         *
 212         * We use ENOSPC in our driver to mean that we have run out of aperture
 213         * space and so want to translate the error from shmemfs back to our
 214         * usual understanding of ENOMEM.
 215         */
 216        if (ret == -ENOSPC)
 217                ret = -ENOMEM;
 218
 219        return ret;
 220}
 221
 222static void
 223shmem_truncate(struct drm_i915_gem_object *obj)
 224{
 225        /*
 226         * Our goal here is to return as much of the memory as
 227         * is possible back to the system as we are called from OOM.
 228         * To do this we must instruct the shmfs to drop all of its
 229         * backing pages, *now*.
 230         */
 231        shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
 232        obj->mm.madv = __I915_MADV_PURGED;
 233        obj->mm.pages = ERR_PTR(-EFAULT);
 234}
 235
 236static void
 237shmem_writeback(struct drm_i915_gem_object *obj)
 238{
 239        struct address_space *mapping;
 240        struct writeback_control wbc = {
 241                .sync_mode = WB_SYNC_NONE,
 242                .nr_to_write = SWAP_CLUSTER_MAX,
 243                .range_start = 0,
 244                .range_end = LLONG_MAX,
 245                .for_reclaim = 1,
 246        };
 247        unsigned long i;
 248
 249        /*
 250         * Leave mmapings intact (GTT will have been revoked on unbinding,
 251         * leaving only CPU mmapings around) and add those pages to the LRU
 252         * instead of invoking writeback so they are aged and paged out
 253         * as normal.
 254         */
 255        mapping = obj->base.filp->f_mapping;
 256
 257        /* Begin writeback on each dirty page */
 258        for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
 259                struct page *page;
 260
 261                page = find_lock_entry(mapping, i);
 262                if (!page || xa_is_value(page))
 263                        continue;
 264
 265                if (!page_mapped(page) && clear_page_dirty_for_io(page)) {
 266                        int ret;
 267
 268                        SetPageReclaim(page);
 269                        ret = mapping->a_ops->writepage(page, &wbc);
 270                        if (!PageWriteback(page))
 271                                ClearPageReclaim(page);
 272                        if (!ret)
 273                                goto put;
 274                }
 275                unlock_page(page);
 276put:
 277                put_page(page);
 278        }
 279}
 280
 281void
 282__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 283                                struct sg_table *pages,
 284                                bool needs_clflush)
 285{
 286        GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
 287
 288        if (obj->mm.madv == I915_MADV_DONTNEED)
 289                obj->mm.dirty = false;
 290
 291        if (needs_clflush &&
 292            (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
 293            !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
 294                drm_clflush_sg(pages);
 295
 296        __start_cpu_write(obj);
 297}
 298
 299static void
 300shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages)
 301{
 302        struct sgt_iter sgt_iter;
 303        struct pagevec pvec;
 304        struct page *page;
 305
 306        __i915_gem_object_release_shmem(obj, pages, true);
 307
 308        i915_gem_gtt_finish_pages(obj, pages);
 309
 310        if (i915_gem_object_needs_bit17_swizzle(obj))
 311                i915_gem_object_save_bit_17_swizzle(obj, pages);
 312
 313        mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
 314
 315        pagevec_init(&pvec);
 316        for_each_sgt_page(page, sgt_iter, pages) {
 317                if (obj->mm.dirty)
 318                        set_page_dirty(page);
 319
 320                if (obj->mm.madv == I915_MADV_WILLNEED)
 321                        mark_page_accessed(page);
 322
 323                if (!pagevec_add(&pvec, page))
 324                        check_release_pagevec(&pvec);
 325        }
 326        if (pagevec_count(&pvec))
 327                check_release_pagevec(&pvec);
 328        obj->mm.dirty = false;
 329
 330        sg_free_table(pages);
 331        kfree(pages);
 332}
 333
 334static int
 335shmem_pwrite(struct drm_i915_gem_object *obj,
 336             const struct drm_i915_gem_pwrite *arg)
 337{
 338        struct address_space *mapping = obj->base.filp->f_mapping;
 339        char __user *user_data = u64_to_user_ptr(arg->data_ptr);
 340        u64 remain, offset;
 341        unsigned int pg;
 342
 343        /* Caller already validated user args */
 344        GEM_BUG_ON(!access_ok(user_data, arg->size));
 345
 346        /*
 347         * Before we instantiate/pin the backing store for our use, we
 348         * can prepopulate the shmemfs filp efficiently using a write into
 349         * the pagecache. We avoid the penalty of instantiating all the
 350         * pages, important if the user is just writing to a few and never
 351         * uses the object on the GPU, and using a direct write into shmemfs
 352         * allows it to avoid the cost of retrieving a page (either swapin
 353         * or clearing-before-use) before it is overwritten.
 354         */
 355        if (i915_gem_object_has_pages(obj))
 356                return -ENODEV;
 357
 358        if (obj->mm.madv != I915_MADV_WILLNEED)
 359                return -EFAULT;
 360
 361        /*
 362         * Before the pages are instantiated the object is treated as being
 363         * in the CPU domain. The pages will be clflushed as required before
 364         * use, and we can freely write into the pages directly. If userspace
 365         * races pwrite with any other operation; corruption will ensue -
 366         * that is userspace's prerogative!
 367         */
 368
 369        remain = arg->size;
 370        offset = arg->offset;
 371        pg = offset_in_page(offset);
 372
 373        do {
 374                unsigned int len, unwritten;
 375                struct page *page;
 376                void *data, *vaddr;
 377                int err;
 378                char c;
 379
 380                len = PAGE_SIZE - pg;
 381                if (len > remain)
 382                        len = remain;
 383
 384                /* Prefault the user page to reduce potential recursion */
 385                err = __get_user(c, user_data);
 386                if (err)
 387                        return err;
 388
 389                err = __get_user(c, user_data + len - 1);
 390                if (err)
 391                        return err;
 392
 393                err = pagecache_write_begin(obj->base.filp, mapping,
 394                                            offset, len, 0,
 395                                            &page, &data);
 396                if (err < 0)
 397                        return err;
 398
 399                vaddr = kmap_atomic(page);
 400                unwritten = __copy_from_user_inatomic(vaddr + pg,
 401                                                      user_data,
 402                                                      len);
 403                kunmap_atomic(vaddr);
 404
 405                err = pagecache_write_end(obj->base.filp, mapping,
 406                                          offset, len, len - unwritten,
 407                                          page, data);
 408                if (err < 0)
 409                        return err;
 410
 411                /* We don't handle -EFAULT, leave it to the caller to check */
 412                if (unwritten)
 413                        return -ENODEV;
 414
 415                remain -= len;
 416                user_data += len;
 417                offset += len;
 418                pg = 0;
 419        } while (remain);
 420
 421        return 0;
 422}
 423
 424static void shmem_release(struct drm_i915_gem_object *obj)
 425{
 426        i915_gem_object_release_memory_region(obj);
 427
 428        fput(obj->base.filp);
 429}
 430
 431const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
 432        .name = "i915_gem_object_shmem",
 433        .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
 434                 I915_GEM_OBJECT_IS_SHRINKABLE,
 435
 436        .get_pages = shmem_get_pages,
 437        .put_pages = shmem_put_pages,
 438        .truncate = shmem_truncate,
 439        .writeback = shmem_writeback,
 440
 441        .pwrite = shmem_pwrite,
 442
 443        .release = shmem_release,
 444};
 445
 446static int __create_shmem(struct drm_i915_private *i915,
 447                          struct drm_gem_object *obj,
 448                          resource_size_t size)
 449{
 450        unsigned long flags = VM_NORESERVE;
 451        struct file *filp;
 452
 453        drm_gem_private_object_init(&i915->drm, obj, size);
 454
 455        if (i915->mm.gemfs)
 456                filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
 457                                                 flags);
 458        else
 459                filp = shmem_file_setup("i915", size, flags);
 460        if (IS_ERR(filp))
 461                return PTR_ERR(filp);
 462
 463        obj->filp = filp;
 464        return 0;
 465}
 466
 467static struct drm_i915_gem_object *
 468create_shmem(struct intel_memory_region *mem,
 469             resource_size_t size,
 470             unsigned int flags)
 471{
 472        static struct lock_class_key lock_class;
 473        struct drm_i915_private *i915 = mem->i915;
 474        struct drm_i915_gem_object *obj;
 475        struct address_space *mapping;
 476        unsigned int cache_level;
 477        gfp_t mask;
 478        int ret;
 479
 480        obj = i915_gem_object_alloc();
 481        if (!obj)
 482                return ERR_PTR(-ENOMEM);
 483
 484        ret = __create_shmem(i915, &obj->base, size);
 485        if (ret)
 486                goto fail;
 487
 488        mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
 489        if (IS_I965GM(i915) || IS_I965G(i915)) {
 490                /* 965gm cannot relocate objects above 4GiB. */
 491                mask &= ~__GFP_HIGHMEM;
 492                mask |= __GFP_DMA32;
 493        }
 494
 495        mapping = obj->base.filp->f_mapping;
 496        mapping_set_gfp_mask(mapping, mask);
 497        GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
 498
 499        i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class);
 500
 501        obj->write_domain = I915_GEM_DOMAIN_CPU;
 502        obj->read_domains = I915_GEM_DOMAIN_CPU;
 503
 504        if (HAS_LLC(i915))
 505                /* On some devices, we can have the GPU use the LLC (the CPU
 506                 * cache) for about a 10% performance improvement
 507                 * compared to uncached.  Graphics requests other than
 508                 * display scanout are coherent with the CPU in
 509                 * accessing this cache.  This means in this mode we
 510                 * don't need to clflush on the CPU side, and on the
 511                 * GPU side we only need to flush internal caches to
 512                 * get data visible to the CPU.
 513                 *
 514                 * However, we maintain the display planes as UC, and so
 515                 * need to rebind when first used as such.
 516                 */
 517                cache_level = I915_CACHE_LLC;
 518        else
 519                cache_level = I915_CACHE_NONE;
 520
 521        i915_gem_object_set_cache_coherency(obj, cache_level);
 522
 523        i915_gem_object_init_memory_region(obj, mem, 0);
 524
 525        return obj;
 526
 527fail:
 528        i915_gem_object_free(obj);
 529        return ERR_PTR(ret);
 530}
 531
 532struct drm_i915_gem_object *
 533i915_gem_object_create_shmem(struct drm_i915_private *i915,
 534                             resource_size_t size)
 535{
 536        return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM],
 537                                             size, 0);
 538}
 539
 540/* Allocate a new GEM object and fill it with the supplied data */
 541struct drm_i915_gem_object *
 542i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
 543                                       const void *data, resource_size_t size)
 544{
 545        struct drm_i915_gem_object *obj;
 546        struct file *file;
 547        resource_size_t offset;
 548        int err;
 549
 550        obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
 551        if (IS_ERR(obj))
 552                return obj;
 553
 554        GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
 555
 556        file = obj->base.filp;
 557        offset = 0;
 558        do {
 559                unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
 560                struct page *page;
 561                void *pgdata, *vaddr;
 562
 563                err = pagecache_write_begin(file, file->f_mapping,
 564                                            offset, len, 0,
 565                                            &page, &pgdata);
 566                if (err < 0)
 567                        goto fail;
 568
 569                vaddr = kmap(page);
 570                memcpy(vaddr, data, len);
 571                kunmap(page);
 572
 573                err = pagecache_write_end(file, file->f_mapping,
 574                                          offset, len, len,
 575                                          page, pgdata);
 576                if (err < 0)
 577                        goto fail;
 578
 579                size -= len;
 580                data += len;
 581                offset += len;
 582        } while (size);
 583
 584        return obj;
 585
 586fail:
 587        i915_gem_object_put(obj);
 588        return ERR_PTR(err);
 589}
 590
 591static int init_shmem(struct intel_memory_region *mem)
 592{
 593        int err;
 594
 595        err = i915_gemfs_init(mem->i915);
 596        if (err) {
 597                DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n",
 598                         err);
 599        }
 600
 601        intel_memory_region_set_name(mem, "system");
 602
 603        return 0; /* Don't error, we can simply fallback to the kernel mnt */
 604}
 605
 606static void release_shmem(struct intel_memory_region *mem)
 607{
 608        i915_gemfs_fini(mem->i915);
 609}
 610
 611static const struct intel_memory_region_ops shmem_region_ops = {
 612        .init = init_shmem,
 613        .release = release_shmem,
 614        .create_object = create_shmem,
 615};
 616
 617struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915)
 618{
 619        return intel_memory_region_create(i915, 0,
 620                                          totalram_pages() << PAGE_SHIFT,
 621                                          PAGE_SIZE, 0,
 622                                          &shmem_region_ops);
 623}
 624