linux/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2014-2016 Intel Corporation
   5 */
   6
   7#include <linux/pagevec.h>
   8#include <linux/swap.h>
   9
  10#include "gem/i915_gem_region.h"
  11#include "i915_drv.h"
  12#include "i915_gemfs.h"
  13#include "i915_gem_object.h"
  14#include "i915_scatterlist.h"
  15#include "i915_trace.h"
  16
  17/*
  18 * Move pages to appropriate lru and release the pagevec, decrementing the
  19 * ref count of those pages.
  20 */
  21static void check_release_pagevec(struct pagevec *pvec)
  22{
  23        check_move_unevictable_pages(pvec);
  24        __pagevec_release(pvec);
  25        cond_resched();
  26}
  27
  28static int shmem_get_pages(struct drm_i915_gem_object *obj)
  29{
  30        struct drm_i915_private *i915 = to_i915(obj->base.dev);
  31        struct intel_memory_region *mem = obj->mm.region;
  32        const unsigned long page_count = obj->base.size / PAGE_SIZE;
  33        unsigned long i;
  34        struct address_space *mapping;
  35        struct sg_table *st;
  36        struct scatterlist *sg;
  37        struct sgt_iter sgt_iter;
  38        struct page *page;
  39        unsigned long last_pfn = 0;     /* suppress gcc warning */
  40        unsigned int max_segment = i915_sg_segment_size();
  41        unsigned int sg_page_sizes;
  42        struct pagevec pvec;
  43        gfp_t noreclaim;
  44        int ret;
  45
  46        /*
  47         * Assert that the object is not currently in any GPU domain. As it
  48         * wasn't in the GTT, there shouldn't be any way it could have been in
  49         * a GPU cache
  50         */
  51        GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
  52        GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
  53
  54        /*
  55         * If there's no chance of allocating enough pages for the whole
  56         * object, bail early.
  57         */
  58        if (obj->base.size > resource_size(&mem->region))
  59                return -ENOMEM;
  60
  61        st = kmalloc(sizeof(*st), GFP_KERNEL);
  62        if (!st)
  63                return -ENOMEM;
  64
  65rebuild_st:
  66        if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
  67                kfree(st);
  68                return -ENOMEM;
  69        }
  70
  71        /*
  72         * Get the list of pages out of our struct file.  They'll be pinned
  73         * at this point until we release them.
  74         *
  75         * Fail silently without starting the shrinker
  76         */
  77        mapping = obj->base.filp->f_mapping;
  78        mapping_set_unevictable(mapping);
  79        noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
  80        noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
  81
  82        sg = st->sgl;
  83        st->nents = 0;
  84        sg_page_sizes = 0;
  85        for (i = 0; i < page_count; i++) {
  86                const unsigned int shrink[] = {
  87                        I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
  88                        0,
  89                }, *s = shrink;
  90                gfp_t gfp = noreclaim;
  91
  92                do {
  93                        cond_resched();
  94                        page = shmem_read_mapping_page_gfp(mapping, i, gfp);
  95                        if (!IS_ERR(page))
  96                                break;
  97
  98                        if (!*s) {
  99                                ret = PTR_ERR(page);
 100                                goto err_sg;
 101                        }
 102
 103                        i915_gem_shrink(i915, 2 * page_count, NULL, *s++);
 104
 105                        /*
 106                         * We've tried hard to allocate the memory by reaping
 107                         * our own buffer, now let the real VM do its job and
 108                         * go down in flames if truly OOM.
 109                         *
 110                         * However, since graphics tend to be disposable,
 111                         * defer the oom here by reporting the ENOMEM back
 112                         * to userspace.
 113                         */
 114                        if (!*s) {
 115                                /* reclaim and warn, but no oom */
 116                                gfp = mapping_gfp_mask(mapping);
 117
 118                                /*
 119                                 * Our bo are always dirty and so we require
 120                                 * kswapd to reclaim our pages (direct reclaim
 121                                 * does not effectively begin pageout of our
 122                                 * buffers on its own). However, direct reclaim
 123                                 * only waits for kswapd when under allocation
 124                                 * congestion. So as a result __GFP_RECLAIM is
 125                                 * unreliable and fails to actually reclaim our
 126                                 * dirty pages -- unless you try over and over
 127                                 * again with !__GFP_NORETRY. However, we still
 128                                 * want to fail this allocation rather than
 129                                 * trigger the out-of-memory killer and for
 130                                 * this we want __GFP_RETRY_MAYFAIL.
 131                                 */
 132                                gfp |= __GFP_RETRY_MAYFAIL;
 133                        }
 134                } while (1);
 135
 136                if (!i ||
 137                    sg->length >= max_segment ||
 138                    page_to_pfn(page) != last_pfn + 1) {
 139                        if (i) {
 140                                sg_page_sizes |= sg->length;
 141                                sg = sg_next(sg);
 142                        }
 143                        st->nents++;
 144                        sg_set_page(sg, page, PAGE_SIZE, 0);
 145                } else {
 146                        sg->length += PAGE_SIZE;
 147                }
 148                last_pfn = page_to_pfn(page);
 149
 150                /* Check that the i965g/gm workaround works. */
 151                WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
 152        }
 153        if (sg) { /* loop terminated early; short sg table */
 154                sg_page_sizes |= sg->length;
 155                sg_mark_end(sg);
 156        }
 157
 158        /* Trim unused sg entries to avoid wasting memory. */
 159        i915_sg_trim(st);
 160
 161        ret = i915_gem_gtt_prepare_pages(obj, st);
 162        if (ret) {
 163                /*
 164                 * DMA remapping failed? One possible cause is that
 165                 * it could not reserve enough large entries, asking
 166                 * for PAGE_SIZE chunks instead may be helpful.
 167                 */
 168                if (max_segment > PAGE_SIZE) {
 169                        for_each_sgt_page(page, sgt_iter, st)
 170                                put_page(page);
 171                        sg_free_table(st);
 172
 173                        max_segment = PAGE_SIZE;
 174                        goto rebuild_st;
 175                } else {
 176                        dev_warn(&i915->drm.pdev->dev,
 177                                 "Failed to DMA remap %lu pages\n",
 178                                 page_count);
 179                        goto err_pages;
 180                }
 181        }
 182
 183        if (i915_gem_object_needs_bit17_swizzle(obj))
 184                i915_gem_object_do_bit_17_swizzle(obj, st);
 185
 186        __i915_gem_object_set_pages(obj, st, sg_page_sizes);
 187
 188        return 0;
 189
 190err_sg:
 191        sg_mark_end(sg);
 192err_pages:
 193        mapping_clear_unevictable(mapping);
 194        pagevec_init(&pvec);
 195        for_each_sgt_page(page, sgt_iter, st) {
 196                if (!pagevec_add(&pvec, page))
 197                        check_release_pagevec(&pvec);
 198        }
 199        if (pagevec_count(&pvec))
 200                check_release_pagevec(&pvec);
 201        sg_free_table(st);
 202        kfree(st);
 203
 204        /*
 205         * shmemfs first checks if there is enough memory to allocate the page
 206         * and reports ENOSPC should there be insufficient, along with the usual
 207         * ENOMEM for a genuine allocation failure.
 208         *
 209         * We use ENOSPC in our driver to mean that we have run out of aperture
 210         * space and so want to translate the error from shmemfs back to our
 211         * usual understanding of ENOMEM.
 212         */
 213        if (ret == -ENOSPC)
 214                ret = -ENOMEM;
 215
 216        return ret;
 217}
 218
 219static void
 220shmem_truncate(struct drm_i915_gem_object *obj)
 221{
 222        /*
 223         * Our goal here is to return as much of the memory as
 224         * is possible back to the system as we are called from OOM.
 225         * To do this we must instruct the shmfs to drop all of its
 226         * backing pages, *now*.
 227         */
 228        shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
 229        obj->mm.madv = __I915_MADV_PURGED;
 230        obj->mm.pages = ERR_PTR(-EFAULT);
 231}
 232
 233static void
 234shmem_writeback(struct drm_i915_gem_object *obj)
 235{
 236        struct address_space *mapping;
 237        struct writeback_control wbc = {
 238                .sync_mode = WB_SYNC_NONE,
 239                .nr_to_write = SWAP_CLUSTER_MAX,
 240                .range_start = 0,
 241                .range_end = LLONG_MAX,
 242                .for_reclaim = 1,
 243        };
 244        unsigned long i;
 245
 246        /*
 247         * Leave mmapings intact (GTT will have been revoked on unbinding,
 248         * leaving only CPU mmapings around) and add those pages to the LRU
 249         * instead of invoking writeback so they are aged and paged out
 250         * as normal.
 251         */
 252        mapping = obj->base.filp->f_mapping;
 253
 254        /* Begin writeback on each dirty page */
 255        for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
 256                struct page *page;
 257
 258                page = find_lock_entry(mapping, i);
 259                if (!page || xa_is_value(page))
 260                        continue;
 261
 262                if (!page_mapped(page) && clear_page_dirty_for_io(page)) {
 263                        int ret;
 264
 265                        SetPageReclaim(page);
 266                        ret = mapping->a_ops->writepage(page, &wbc);
 267                        if (!PageWriteback(page))
 268                                ClearPageReclaim(page);
 269                        if (!ret)
 270                                goto put;
 271                }
 272                unlock_page(page);
 273put:
 274                put_page(page);
 275        }
 276}
 277
 278void
 279__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 280                                struct sg_table *pages,
 281                                bool needs_clflush)
 282{
 283        GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
 284
 285        if (obj->mm.madv == I915_MADV_DONTNEED)
 286                obj->mm.dirty = false;
 287
 288        if (needs_clflush &&
 289            (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
 290            !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
 291                drm_clflush_sg(pages);
 292
 293        __start_cpu_write(obj);
 294}
 295
 296static void
 297shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages)
 298{
 299        struct sgt_iter sgt_iter;
 300        struct pagevec pvec;
 301        struct page *page;
 302
 303        __i915_gem_object_release_shmem(obj, pages, true);
 304
 305        i915_gem_gtt_finish_pages(obj, pages);
 306
 307        if (i915_gem_object_needs_bit17_swizzle(obj))
 308                i915_gem_object_save_bit_17_swizzle(obj, pages);
 309
 310        mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
 311
 312        pagevec_init(&pvec);
 313        for_each_sgt_page(page, sgt_iter, pages) {
 314                if (obj->mm.dirty)
 315                        set_page_dirty(page);
 316
 317                if (obj->mm.madv == I915_MADV_WILLNEED)
 318                        mark_page_accessed(page);
 319
 320                if (!pagevec_add(&pvec, page))
 321                        check_release_pagevec(&pvec);
 322        }
 323        if (pagevec_count(&pvec))
 324                check_release_pagevec(&pvec);
 325        obj->mm.dirty = false;
 326
 327        sg_free_table(pages);
 328        kfree(pages);
 329}
 330
 331static int
 332shmem_pwrite(struct drm_i915_gem_object *obj,
 333             const struct drm_i915_gem_pwrite *arg)
 334{
 335        struct address_space *mapping = obj->base.filp->f_mapping;
 336        char __user *user_data = u64_to_user_ptr(arg->data_ptr);
 337        u64 remain, offset;
 338        unsigned int pg;
 339
 340        /* Caller already validated user args */
 341        GEM_BUG_ON(!access_ok(user_data, arg->size));
 342
 343        /*
 344         * Before we instantiate/pin the backing store for our use, we
 345         * can prepopulate the shmemfs filp efficiently using a write into
 346         * the pagecache. We avoid the penalty of instantiating all the
 347         * pages, important if the user is just writing to a few and never
 348         * uses the object on the GPU, and using a direct write into shmemfs
 349         * allows it to avoid the cost of retrieving a page (either swapin
 350         * or clearing-before-use) before it is overwritten.
 351         */
 352        if (i915_gem_object_has_pages(obj))
 353                return -ENODEV;
 354
 355        if (obj->mm.madv != I915_MADV_WILLNEED)
 356                return -EFAULT;
 357
 358        /*
 359         * Before the pages are instantiated the object is treated as being
 360         * in the CPU domain. The pages will be clflushed as required before
 361         * use, and we can freely write into the pages directly. If userspace
 362         * races pwrite with any other operation; corruption will ensue -
 363         * that is userspace's prerogative!
 364         */
 365
 366        remain = arg->size;
 367        offset = arg->offset;
 368        pg = offset_in_page(offset);
 369
 370        do {
 371                unsigned int len, unwritten;
 372                struct page *page;
 373                void *data, *vaddr;
 374                int err;
 375                char c;
 376
 377                len = PAGE_SIZE - pg;
 378                if (len > remain)
 379                        len = remain;
 380
 381                /* Prefault the user page to reduce potential recursion */
 382                err = __get_user(c, user_data);
 383                if (err)
 384                        return err;
 385
 386                err = __get_user(c, user_data + len - 1);
 387                if (err)
 388                        return err;
 389
 390                err = pagecache_write_begin(obj->base.filp, mapping,
 391                                            offset, len, 0,
 392                                            &page, &data);
 393                if (err < 0)
 394                        return err;
 395
 396                vaddr = kmap_atomic(page);
 397                unwritten = __copy_from_user_inatomic(vaddr + pg,
 398                                                      user_data,
 399                                                      len);
 400                kunmap_atomic(vaddr);
 401
 402                err = pagecache_write_end(obj->base.filp, mapping,
 403                                          offset, len, len - unwritten,
 404                                          page, data);
 405                if (err < 0)
 406                        return err;
 407
 408                /* We don't handle -EFAULT, leave it to the caller to check */
 409                if (unwritten)
 410                        return -ENODEV;
 411
 412                remain -= len;
 413                user_data += len;
 414                offset += len;
 415                pg = 0;
 416        } while (remain);
 417
 418        return 0;
 419}
 420
 421static void shmem_release(struct drm_i915_gem_object *obj)
 422{
 423        i915_gem_object_release_memory_region(obj);
 424
 425        fput(obj->base.filp);
 426}
 427
 428const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
 429        .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
 430                 I915_GEM_OBJECT_IS_SHRINKABLE,
 431
 432        .get_pages = shmem_get_pages,
 433        .put_pages = shmem_put_pages,
 434        .truncate = shmem_truncate,
 435        .writeback = shmem_writeback,
 436
 437        .pwrite = shmem_pwrite,
 438
 439        .release = shmem_release,
 440};
 441
 442static int __create_shmem(struct drm_i915_private *i915,
 443                          struct drm_gem_object *obj,
 444                          resource_size_t size)
 445{
 446        unsigned long flags = VM_NORESERVE;
 447        struct file *filp;
 448
 449        drm_gem_private_object_init(&i915->drm, obj, size);
 450
 451        if (i915->mm.gemfs)
 452                filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
 453                                                 flags);
 454        else
 455                filp = shmem_file_setup("i915", size, flags);
 456        if (IS_ERR(filp))
 457                return PTR_ERR(filp);
 458
 459        obj->filp = filp;
 460        return 0;
 461}
 462
 463static struct drm_i915_gem_object *
 464create_shmem(struct intel_memory_region *mem,
 465             resource_size_t size,
 466             unsigned int flags)
 467{
 468        static struct lock_class_key lock_class;
 469        struct drm_i915_private *i915 = mem->i915;
 470        struct drm_i915_gem_object *obj;
 471        struct address_space *mapping;
 472        unsigned int cache_level;
 473        gfp_t mask;
 474        int ret;
 475
 476        obj = i915_gem_object_alloc();
 477        if (!obj)
 478                return ERR_PTR(-ENOMEM);
 479
 480        ret = __create_shmem(i915, &obj->base, size);
 481        if (ret)
 482                goto fail;
 483
 484        mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
 485        if (IS_I965GM(i915) || IS_I965G(i915)) {
 486                /* 965gm cannot relocate objects above 4GiB. */
 487                mask &= ~__GFP_HIGHMEM;
 488                mask |= __GFP_DMA32;
 489        }
 490
 491        mapping = obj->base.filp->f_mapping;
 492        mapping_set_gfp_mask(mapping, mask);
 493        GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
 494
 495        i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class);
 496
 497        obj->write_domain = I915_GEM_DOMAIN_CPU;
 498        obj->read_domains = I915_GEM_DOMAIN_CPU;
 499
 500        if (HAS_LLC(i915))
 501                /* On some devices, we can have the GPU use the LLC (the CPU
 502                 * cache) for about a 10% performance improvement
 503                 * compared to uncached.  Graphics requests other than
 504                 * display scanout are coherent with the CPU in
 505                 * accessing this cache.  This means in this mode we
 506                 * don't need to clflush on the CPU side, and on the
 507                 * GPU side we only need to flush internal caches to
 508                 * get data visible to the CPU.
 509                 *
 510                 * However, we maintain the display planes as UC, and so
 511                 * need to rebind when first used as such.
 512                 */
 513                cache_level = I915_CACHE_LLC;
 514        else
 515                cache_level = I915_CACHE_NONE;
 516
 517        i915_gem_object_set_cache_coherency(obj, cache_level);
 518
 519        i915_gem_object_init_memory_region(obj, mem, 0);
 520
 521        return obj;
 522
 523fail:
 524        i915_gem_object_free(obj);
 525        return ERR_PTR(ret);
 526}
 527
 528struct drm_i915_gem_object *
 529i915_gem_object_create_shmem(struct drm_i915_private *i915,
 530                             resource_size_t size)
 531{
 532        return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM],
 533                                             size, 0);
 534}
 535
 536/* Allocate a new GEM object and fill it with the supplied data */
 537struct drm_i915_gem_object *
 538i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
 539                                       const void *data, resource_size_t size)
 540{
 541        struct drm_i915_gem_object *obj;
 542        struct file *file;
 543        resource_size_t offset;
 544        int err;
 545
 546        obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
 547        if (IS_ERR(obj))
 548                return obj;
 549
 550        GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
 551
 552        file = obj->base.filp;
 553        offset = 0;
 554        do {
 555                unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
 556                struct page *page;
 557                void *pgdata, *vaddr;
 558
 559                err = pagecache_write_begin(file, file->f_mapping,
 560                                            offset, len, 0,
 561                                            &page, &pgdata);
 562                if (err < 0)
 563                        goto fail;
 564
 565                vaddr = kmap(page);
 566                memcpy(vaddr, data, len);
 567                kunmap(page);
 568
 569                err = pagecache_write_end(file, file->f_mapping,
 570                                          offset, len, len,
 571                                          page, pgdata);
 572                if (err < 0)
 573                        goto fail;
 574
 575                size -= len;
 576                data += len;
 577                offset += len;
 578        } while (size);
 579
 580        return obj;
 581
 582fail:
 583        i915_gem_object_put(obj);
 584        return ERR_PTR(err);
 585}
 586
 587static int init_shmem(struct intel_memory_region *mem)
 588{
 589        int err;
 590
 591        err = i915_gemfs_init(mem->i915);
 592        if (err) {
 593                DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n",
 594                         err);
 595        }
 596
 597        return 0; /* Don't error, we can simply fallback to the kernel mnt */
 598}
 599
 600static void release_shmem(struct intel_memory_region *mem)
 601{
 602        i915_gemfs_fini(mem->i915);
 603}
 604
 605static const struct intel_memory_region_ops shmem_region_ops = {
 606        .init = init_shmem,
 607        .release = release_shmem,
 608        .create_object = create_shmem,
 609};
 610
 611struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915)
 612{
 613        return intel_memory_region_create(i915, 0,
 614                                          totalram_pages() << PAGE_SHIFT,
 615                                          PAGE_SIZE, 0,
 616                                          &shmem_region_ops);
 617}
 618