linux/drivers/gpu/drm/i915/i915_gem.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2008-2015 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * Authors:
  24 *    Eric Anholt <eric@anholt.net>
  25 *
  26 */
  27
  28#include <drm/drmP.h>
  29#include <drm/drm_vma_manager.h>
  30#include <drm/i915_drm.h>
  31#include "i915_drv.h"
  32#include "i915_gem_clflush.h"
  33#include "i915_vgpu.h"
  34#include "i915_trace.h"
  35#include "intel_drv.h"
  36#include "intel_frontbuffer.h"
  37#include "intel_mocs.h"
  38#include "i915_gemfs.h"
  39#include <linux/dma-fence-array.h>
  40#include <linux/kthread.h>
  41#include <linux/reservation.h>
  42#include <linux/shmem_fs.h>
  43#include <linux/slab.h>
  44#include <linux/stop_machine.h>
  45#include <linux/swap.h>
  46#include <linux/pci.h>
  47#include <linux/dma-buf.h>
  48
  49static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
  50
  51static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
  52{
  53        if (obj->cache_dirty)
  54                return false;
  55
  56        if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
  57                return true;
  58
  59        return obj->pin_global; /* currently in use by HW, keep flushed */
  60}
  61
  62static int
  63insert_mappable_node(struct i915_ggtt *ggtt,
  64                     struct drm_mm_node *node, u32 size)
  65{
  66        memset(node, 0, sizeof(*node));
  67        return drm_mm_insert_node_in_range(&ggtt->base.mm, node,
  68                                           size, 0, I915_COLOR_UNEVICTABLE,
  69                                           0, ggtt->mappable_end,
  70                                           DRM_MM_INSERT_LOW);
  71}
  72
  73static void
  74remove_mappable_node(struct drm_mm_node *node)
  75{
  76        drm_mm_remove_node(node);
  77}
  78
  79/* some bookkeeping */
  80static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
  81                                  u64 size)
  82{
  83        spin_lock(&dev_priv->mm.object_stat_lock);
  84        dev_priv->mm.object_count++;
  85        dev_priv->mm.object_memory += size;
  86        spin_unlock(&dev_priv->mm.object_stat_lock);
  87}
  88
  89static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
  90                                     u64 size)
  91{
  92        spin_lock(&dev_priv->mm.object_stat_lock);
  93        dev_priv->mm.object_count--;
  94        dev_priv->mm.object_memory -= size;
  95        spin_unlock(&dev_priv->mm.object_stat_lock);
  96}
  97
  98static int
  99i915_gem_wait_for_error(struct i915_gpu_error *error)
 100{
 101        int ret;
 102
 103        might_sleep();
 104
 105        /*
 106         * Only wait 10 seconds for the gpu reset to complete to avoid hanging
 107         * userspace. If it takes that long something really bad is going on and
 108         * we should simply try to bail out and fail as gracefully as possible.
 109         */
 110        ret = wait_event_interruptible_timeout(error->reset_queue,
 111                                               !i915_reset_backoff(error),
 112                                               I915_RESET_TIMEOUT);
 113        if (ret == 0) {
 114                DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
 115                return -EIO;
 116        } else if (ret < 0) {
 117                return ret;
 118        } else {
 119                return 0;
 120        }
 121}
 122
 123int i915_mutex_lock_interruptible(struct drm_device *dev)
 124{
 125        struct drm_i915_private *dev_priv = to_i915(dev);
 126        int ret;
 127
 128        ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
 129        if (ret)
 130                return ret;
 131
 132        ret = mutex_lock_interruptible(&dev->struct_mutex);
 133        if (ret)
 134                return ret;
 135
 136        return 0;
 137}
 138
 139int
 140i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 141                            struct drm_file *file)
 142{
 143        struct drm_i915_private *dev_priv = to_i915(dev);
 144        struct i915_ggtt *ggtt = &dev_priv->ggtt;
 145        struct drm_i915_gem_get_aperture *args = data;
 146        struct i915_vma *vma;
 147        u64 pinned;
 148
 149        pinned = ggtt->base.reserved;
 150        mutex_lock(&dev->struct_mutex);
 151        list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
 152                if (i915_vma_is_pinned(vma))
 153                        pinned += vma->node.size;
 154        list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
 155                if (i915_vma_is_pinned(vma))
 156                        pinned += vma->node.size;
 157        mutex_unlock(&dev->struct_mutex);
 158
 159        args->aper_size = ggtt->base.total;
 160        args->aper_available_size = args->aper_size - pinned;
 161
 162        return 0;
 163}
 164
 165static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 166{
 167        struct address_space *mapping = obj->base.filp->f_mapping;
 168        drm_dma_handle_t *phys;
 169        struct sg_table *st;
 170        struct scatterlist *sg;
 171        char *vaddr;
 172        int i;
 173        int err;
 174
 175        if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
 176                return -EINVAL;
 177
 178        /* Always aligning to the object size, allows a single allocation
 179         * to handle all possible callers, and given typical object sizes,
 180         * the alignment of the buddy allocation will naturally match.
 181         */
 182        phys = drm_pci_alloc(obj->base.dev,
 183                             roundup_pow_of_two(obj->base.size),
 184                             roundup_pow_of_two(obj->base.size));
 185        if (!phys)
 186                return -ENOMEM;
 187
 188        vaddr = phys->vaddr;
 189        for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 190                struct page *page;
 191                char *src;
 192
 193                page = shmem_read_mapping_page(mapping, i);
 194                if (IS_ERR(page)) {
 195                        err = PTR_ERR(page);
 196                        goto err_phys;
 197                }
 198
 199                src = kmap_atomic(page);
 200                memcpy(vaddr, src, PAGE_SIZE);
 201                drm_clflush_virt_range(vaddr, PAGE_SIZE);
 202                kunmap_atomic(src);
 203
 204                put_page(page);
 205                vaddr += PAGE_SIZE;
 206        }
 207
 208        i915_gem_chipset_flush(to_i915(obj->base.dev));
 209
 210        st = kmalloc(sizeof(*st), GFP_KERNEL);
 211        if (!st) {
 212                err = -ENOMEM;
 213                goto err_phys;
 214        }
 215
 216        if (sg_alloc_table(st, 1, GFP_KERNEL)) {
 217                kfree(st);
 218                err = -ENOMEM;
 219                goto err_phys;
 220        }
 221
 222        sg = st->sgl;
 223        sg->offset = 0;
 224        sg->length = obj->base.size;
 225
 226        sg_dma_address(sg) = phys->busaddr;
 227        sg_dma_len(sg) = obj->base.size;
 228
 229        obj->phys_handle = phys;
 230
 231        __i915_gem_object_set_pages(obj, st, sg->length);
 232
 233        return 0;
 234
 235err_phys:
 236        drm_pci_free(obj->base.dev, phys);
 237
 238        return err;
 239}
 240
 241static void __start_cpu_write(struct drm_i915_gem_object *obj)
 242{
 243        obj->read_domains = I915_GEM_DOMAIN_CPU;
 244        obj->write_domain = I915_GEM_DOMAIN_CPU;
 245        if (cpu_write_needs_clflush(obj))
 246                obj->cache_dirty = true;
 247}
 248
 249static void
 250__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 251                                struct sg_table *pages,
 252                                bool needs_clflush)
 253{
 254        GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
 255
 256        if (obj->mm.madv == I915_MADV_DONTNEED)
 257                obj->mm.dirty = false;
 258
 259        if (needs_clflush &&
 260            (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
 261            !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
 262                drm_clflush_sg(pages);
 263
 264        __start_cpu_write(obj);
 265}
 266
 267static void
 268i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
 269                               struct sg_table *pages)
 270{
 271        __i915_gem_object_release_shmem(obj, pages, false);
 272
 273        if (obj->mm.dirty) {
 274                struct address_space *mapping = obj->base.filp->f_mapping;
 275                char *vaddr = obj->phys_handle->vaddr;
 276                int i;
 277
 278                for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 279                        struct page *page;
 280                        char *dst;
 281
 282                        page = shmem_read_mapping_page(mapping, i);
 283                        if (IS_ERR(page))
 284                                continue;
 285
 286                        dst = kmap_atomic(page);
 287                        drm_clflush_virt_range(vaddr, PAGE_SIZE);
 288                        memcpy(dst, vaddr, PAGE_SIZE);
 289                        kunmap_atomic(dst);
 290
 291                        set_page_dirty(page);
 292                        if (obj->mm.madv == I915_MADV_WILLNEED)
 293                                mark_page_accessed(page);
 294                        put_page(page);
 295                        vaddr += PAGE_SIZE;
 296                }
 297                obj->mm.dirty = false;
 298        }
 299
 300        sg_free_table(pages);
 301        kfree(pages);
 302
 303        drm_pci_free(obj->base.dev, obj->phys_handle);
 304}
 305
 306static void
 307i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
 308{
 309        i915_gem_object_unpin_pages(obj);
 310}
 311
 312static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
 313        .get_pages = i915_gem_object_get_pages_phys,
 314        .put_pages = i915_gem_object_put_pages_phys,
 315        .release = i915_gem_object_release_phys,
 316};
 317
 318static const struct drm_i915_gem_object_ops i915_gem_object_ops;
 319
 320int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 321{
 322        struct i915_vma *vma;
 323        LIST_HEAD(still_in_list);
 324        int ret;
 325
 326        lockdep_assert_held(&obj->base.dev->struct_mutex);
 327
 328        /* Closed vma are removed from the obj->vma_list - but they may
 329         * still have an active binding on the object. To remove those we
 330         * must wait for all rendering to complete to the object (as unbinding
 331         * must anyway), and retire the requests.
 332         */
 333        ret = i915_gem_object_set_to_cpu_domain(obj, false);
 334        if (ret)
 335                return ret;
 336
 337        while ((vma = list_first_entry_or_null(&obj->vma_list,
 338                                               struct i915_vma,
 339                                               obj_link))) {
 340                list_move_tail(&vma->obj_link, &still_in_list);
 341                ret = i915_vma_unbind(vma);
 342                if (ret)
 343                        break;
 344        }
 345        list_splice(&still_in_list, &obj->vma_list);
 346
 347        return ret;
 348}
 349
 350static long
 351i915_gem_object_wait_fence(struct dma_fence *fence,
 352                           unsigned int flags,
 353                           long timeout,
 354                           struct intel_rps_client *rps_client)
 355{
 356        struct i915_request *rq;
 357
 358        BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
 359
 360        if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 361                return timeout;
 362
 363        if (!dma_fence_is_i915(fence))
 364                return dma_fence_wait_timeout(fence,
 365                                              flags & I915_WAIT_INTERRUPTIBLE,
 366                                              timeout);
 367
 368        rq = to_request(fence);
 369        if (i915_request_completed(rq))
 370                goto out;
 371
 372        /*
 373         * This client is about to stall waiting for the GPU. In many cases
 374         * this is undesirable and limits the throughput of the system, as
 375         * many clients cannot continue processing user input/output whilst
 376         * blocked. RPS autotuning may take tens of milliseconds to respond
 377         * to the GPU load and thus incurs additional latency for the client.
 378         * We can circumvent that by promoting the GPU frequency to maximum
 379         * before we wait. This makes the GPU throttle up much more quickly
 380         * (good for benchmarks and user experience, e.g. window animations),
 381         * but at a cost of spending more power processing the workload
 382         * (bad for battery). Not all clients even want their results
 383         * immediately and for them we should just let the GPU select its own
 384         * frequency to maximise efficiency. To prevent a single client from
 385         * forcing the clocks too high for the whole system, we only allow
 386         * each client to waitboost once in a busy period.
 387         */
 388        if (rps_client && !i915_request_started(rq)) {
 389                if (INTEL_GEN(rq->i915) >= 6)
 390                        gen6_rps_boost(rq, rps_client);
 391        }
 392
 393        timeout = i915_request_wait(rq, flags, timeout);
 394
 395out:
 396        if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
 397                i915_request_retire_upto(rq);
 398
 399        return timeout;
 400}
 401
 402static long
 403i915_gem_object_wait_reservation(struct reservation_object *resv,
 404                                 unsigned int flags,
 405                                 long timeout,
 406                                 struct intel_rps_client *rps_client)
 407{
 408        unsigned int seq = __read_seqcount_begin(&resv->seq);
 409        struct dma_fence *excl;
 410        bool prune_fences = false;
 411
 412        if (flags & I915_WAIT_ALL) {
 413                struct dma_fence **shared;
 414                unsigned int count, i;
 415                int ret;
 416
 417                ret = reservation_object_get_fences_rcu(resv,
 418                                                        &excl, &count, &shared);
 419                if (ret)
 420                        return ret;
 421
 422                for (i = 0; i < count; i++) {
 423                        timeout = i915_gem_object_wait_fence(shared[i],
 424                                                             flags, timeout,
 425                                                             rps_client);
 426                        if (timeout < 0)
 427                                break;
 428
 429                        dma_fence_put(shared[i]);
 430                }
 431
 432                for (; i < count; i++)
 433                        dma_fence_put(shared[i]);
 434                kfree(shared);
 435
 436                /*
 437                 * If both shared fences and an exclusive fence exist,
 438                 * then by construction the shared fences must be later
 439                 * than the exclusive fence. If we successfully wait for
 440                 * all the shared fences, we know that the exclusive fence
 441                 * must all be signaled. If all the shared fences are
 442                 * signaled, we can prune the array and recover the
 443                 * floating references on the fences/requests.
 444                 */
 445                prune_fences = count && timeout >= 0;
 446        } else {
 447                excl = reservation_object_get_excl_rcu(resv);
 448        }
 449
 450        if (excl && timeout >= 0)
 451                timeout = i915_gem_object_wait_fence(excl, flags, timeout,
 452                                                     rps_client);
 453
 454        dma_fence_put(excl);
 455
 456        /*
 457         * Opportunistically prune the fences iff we know they have *all* been
 458         * signaled and that the reservation object has not been changed (i.e.
 459         * no new fences have been added).
 460         */
 461        if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
 462                if (reservation_object_trylock(resv)) {
 463                        if (!__read_seqcount_retry(&resv->seq, seq))
 464                                reservation_object_add_excl_fence(resv, NULL);
 465                        reservation_object_unlock(resv);
 466                }
 467        }
 468
 469        return timeout;
 470}
 471
 472static void __fence_set_priority(struct dma_fence *fence, int prio)
 473{
 474        struct i915_request *rq;
 475        struct intel_engine_cs *engine;
 476
 477        if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
 478                return;
 479
 480        rq = to_request(fence);
 481        engine = rq->engine;
 482
 483        rcu_read_lock();
 484        if (engine->schedule)
 485                engine->schedule(rq, prio);
 486        rcu_read_unlock();
 487}
 488
 489static void fence_set_priority(struct dma_fence *fence, int prio)
 490{
 491        /* Recurse once into a fence-array */
 492        if (dma_fence_is_array(fence)) {
 493                struct dma_fence_array *array = to_dma_fence_array(fence);
 494                int i;
 495
 496                for (i = 0; i < array->num_fences; i++)
 497                        __fence_set_priority(array->fences[i], prio);
 498        } else {
 499                __fence_set_priority(fence, prio);
 500        }
 501}
 502
 503int
 504i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 505                              unsigned int flags,
 506                              int prio)
 507{
 508        struct dma_fence *excl;
 509
 510        if (flags & I915_WAIT_ALL) {
 511                struct dma_fence **shared;
 512                unsigned int count, i;
 513                int ret;
 514
 515                ret = reservation_object_get_fences_rcu(obj->resv,
 516                                                        &excl, &count, &shared);
 517                if (ret)
 518                        return ret;
 519
 520                for (i = 0; i < count; i++) {
 521                        fence_set_priority(shared[i], prio);
 522                        dma_fence_put(shared[i]);
 523                }
 524
 525                kfree(shared);
 526        } else {
 527                excl = reservation_object_get_excl_rcu(obj->resv);
 528        }
 529
 530        if (excl) {
 531                fence_set_priority(excl, prio);
 532                dma_fence_put(excl);
 533        }
 534        return 0;
 535}
 536
 537/**
 538 * Waits for rendering to the object to be completed
 539 * @obj: i915 gem object
 540 * @flags: how to wait (under a lock, for all rendering or just for writes etc)
 541 * @timeout: how long to wait
 542 * @rps_client: client (user process) to charge for any waitboosting
 543 */
 544int
 545i915_gem_object_wait(struct drm_i915_gem_object *obj,
 546                     unsigned int flags,
 547                     long timeout,
 548                     struct intel_rps_client *rps_client)
 549{
 550        might_sleep();
 551#if IS_ENABLED(CONFIG_LOCKDEP)
 552        GEM_BUG_ON(debug_locks &&
 553                   !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
 554                   !!(flags & I915_WAIT_LOCKED));
 555#endif
 556        GEM_BUG_ON(timeout < 0);
 557
 558        timeout = i915_gem_object_wait_reservation(obj->resv,
 559                                                   flags, timeout,
 560                                                   rps_client);
 561        return timeout < 0 ? timeout : 0;
 562}
 563
 564static struct intel_rps_client *to_rps_client(struct drm_file *file)
 565{
 566        struct drm_i915_file_private *fpriv = file->driver_priv;
 567
 568        return &fpriv->rps_client;
 569}
 570
 571static int
 572i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 573                     struct drm_i915_gem_pwrite *args,
 574                     struct drm_file *file)
 575{
 576        void *vaddr = obj->phys_handle->vaddr + args->offset;
 577        char __user *user_data = u64_to_user_ptr(args->data_ptr);
 578
 579        /* We manually control the domain here and pretend that it
 580         * remains coherent i.e. in the GTT domain, like shmem_pwrite.
 581         */
 582        intel_fb_obj_invalidate(obj, ORIGIN_CPU);
 583        if (copy_from_user(vaddr, user_data, args->size))
 584                return -EFAULT;
 585
 586        drm_clflush_virt_range(vaddr, args->size);
 587        i915_gem_chipset_flush(to_i915(obj->base.dev));
 588
 589        intel_fb_obj_flush(obj, ORIGIN_CPU);
 590        return 0;
 591}
 592
 593void *i915_gem_object_alloc(struct drm_i915_private *dev_priv)
 594{
 595        return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
 596}
 597
 598void i915_gem_object_free(struct drm_i915_gem_object *obj)
 599{
 600        struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 601        kmem_cache_free(dev_priv->objects, obj);
 602}
 603
 604static int
 605i915_gem_create(struct drm_file *file,
 606                struct drm_i915_private *dev_priv,
 607                uint64_t size,
 608                uint32_t *handle_p)
 609{
 610        struct drm_i915_gem_object *obj;
 611        int ret;
 612        u32 handle;
 613
 614        size = roundup(size, PAGE_SIZE);
 615        if (size == 0)
 616                return -EINVAL;
 617
 618        /* Allocate the new object */
 619        obj = i915_gem_object_create(dev_priv, size);
 620        if (IS_ERR(obj))
 621                return PTR_ERR(obj);
 622
 623        ret = drm_gem_handle_create(file, &obj->base, &handle);
 624        /* drop reference from allocate - handle holds it now */
 625        i915_gem_object_put(obj);
 626        if (ret)
 627                return ret;
 628
 629        *handle_p = handle;
 630        return 0;
 631}
 632
 633int
 634i915_gem_dumb_create(struct drm_file *file,
 635                     struct drm_device *dev,
 636                     struct drm_mode_create_dumb *args)
 637{
 638        /* have to work out size/pitch and return them */
 639        args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
 640        args->size = args->pitch * args->height;
 641        return i915_gem_create(file, to_i915(dev),
 642                               args->size, &args->handle);
 643}
 644
 645static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 646{
 647        return !(obj->cache_level == I915_CACHE_NONE ||
 648                 obj->cache_level == I915_CACHE_WT);
 649}
 650
 651/**
 652 * Creates a new mm object and returns a handle to it.
 653 * @dev: drm device pointer
 654 * @data: ioctl data blob
 655 * @file: drm file pointer
 656 */
 657int
 658i915_gem_create_ioctl(struct drm_device *dev, void *data,
 659                      struct drm_file *file)
 660{
 661        struct drm_i915_private *dev_priv = to_i915(dev);
 662        struct drm_i915_gem_create *args = data;
 663
 664        i915_gem_flush_free_objects(dev_priv);
 665
 666        return i915_gem_create(file, dev_priv,
 667                               args->size, &args->handle);
 668}
 669
 670static inline enum fb_op_origin
 671fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
 672{
 673        return (domain == I915_GEM_DOMAIN_GTT ?
 674                obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
 675}
 676
 677void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 678{
 679        /*
 680         * No actual flushing is required for the GTT write domain for reads
 681         * from the GTT domain. Writes to it "immediately" go to main memory
 682         * as far as we know, so there's no chipset flush. It also doesn't
 683         * land in the GPU render cache.
 684         *
 685         * However, we do have to enforce the order so that all writes through
 686         * the GTT land before any writes to the device, such as updates to
 687         * the GATT itself.
 688         *
 689         * We also have to wait a bit for the writes to land from the GTT.
 690         * An uncached read (i.e. mmio) seems to be ideal for the round-trip
 691         * timing. This issue has only been observed when switching quickly
 692         * between GTT writes and CPU reads from inside the kernel on recent hw,
 693         * and it appears to only affect discrete GTT blocks (i.e. on LLC
 694         * system agents we cannot reproduce this behaviour, until Cannonlake
 695         * that was!).
 696         */
 697
 698        wmb();
 699
 700        intel_runtime_pm_get(dev_priv);
 701        spin_lock_irq(&dev_priv->uncore.lock);
 702
 703        POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
 704
 705        spin_unlock_irq(&dev_priv->uncore.lock);
 706        intel_runtime_pm_put(dev_priv);
 707}
 708
 709static void
 710flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 711{
 712        struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 713        struct i915_vma *vma;
 714
 715        if (!(obj->write_domain & flush_domains))
 716                return;
 717
 718        switch (obj->write_domain) {
 719        case I915_GEM_DOMAIN_GTT:
 720                i915_gem_flush_ggtt_writes(dev_priv);
 721
 722                intel_fb_obj_flush(obj,
 723                                   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
 724
 725                for_each_ggtt_vma(vma, obj) {
 726                        if (vma->iomap)
 727                                continue;
 728
 729                        i915_vma_unset_ggtt_write(vma);
 730                }
 731                break;
 732
 733        case I915_GEM_DOMAIN_CPU:
 734                i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 735                break;
 736
 737        case I915_GEM_DOMAIN_RENDER:
 738                if (gpu_write_needs_clflush(obj))
 739                        obj->cache_dirty = true;
 740                break;
 741        }
 742
 743        obj->write_domain = 0;
 744}
 745
 746static inline int
 747__copy_to_user_swizzled(char __user *cpu_vaddr,
 748                        const char *gpu_vaddr, int gpu_offset,
 749                        int length)
 750{
 751        int ret, cpu_offset = 0;
 752
 753        while (length > 0) {
 754                int cacheline_end = ALIGN(gpu_offset + 1, 64);
 755                int this_length = min(cacheline_end - gpu_offset, length);
 756                int swizzled_gpu_offset = gpu_offset ^ 64;
 757
 758                ret = __copy_to_user(cpu_vaddr + cpu_offset,
 759                                     gpu_vaddr + swizzled_gpu_offset,
 760                                     this_length);
 761                if (ret)
 762                        return ret + length;
 763
 764                cpu_offset += this_length;
 765                gpu_offset += this_length;
 766                length -= this_length;
 767        }
 768
 769        return 0;
 770}
 771
 772static inline int
 773__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
 774                          const char __user *cpu_vaddr,
 775                          int length)
 776{
 777        int ret, cpu_offset = 0;
 778
 779        while (length > 0) {
 780                int cacheline_end = ALIGN(gpu_offset + 1, 64);
 781                int this_length = min(cacheline_end - gpu_offset, length);
 782                int swizzled_gpu_offset = gpu_offset ^ 64;
 783
 784                ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
 785                                       cpu_vaddr + cpu_offset,
 786                                       this_length);
 787                if (ret)
 788                        return ret + length;
 789
 790                cpu_offset += this_length;
 791                gpu_offset += this_length;
 792                length -= this_length;
 793        }
 794
 795        return 0;
 796}
 797
 798/*
 799 * Pins the specified object's pages and synchronizes the object with
 800 * GPU accesses. Sets needs_clflush to non-zero if the caller should
 801 * flush the object from the CPU cache.
 802 */
 803int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 804                                    unsigned int *needs_clflush)
 805{
 806        int ret;
 807
 808        lockdep_assert_held(&obj->base.dev->struct_mutex);
 809
 810        *needs_clflush = 0;
 811        if (!i915_gem_object_has_struct_page(obj))
 812                return -ENODEV;
 813
 814        ret = i915_gem_object_wait(obj,
 815                                   I915_WAIT_INTERRUPTIBLE |
 816                                   I915_WAIT_LOCKED,
 817                                   MAX_SCHEDULE_TIMEOUT,
 818                                   NULL);
 819        if (ret)
 820                return ret;
 821
 822        ret = i915_gem_object_pin_pages(obj);
 823        if (ret)
 824                return ret;
 825
 826        if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
 827            !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 828                ret = i915_gem_object_set_to_cpu_domain(obj, false);
 829                if (ret)
 830                        goto err_unpin;
 831                else
 832                        goto out;
 833        }
 834
 835        flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 836
 837        /* If we're not in the cpu read domain, set ourself into the gtt
 838         * read domain and manually flush cachelines (if required). This
 839         * optimizes for the case when the gpu will dirty the data
 840         * anyway again before the next pread happens.
 841         */
 842        if (!obj->cache_dirty &&
 843            !(obj->read_domains & I915_GEM_DOMAIN_CPU))
 844                *needs_clflush = CLFLUSH_BEFORE;
 845
 846out:
 847        /* return with the pages pinned */
 848        return 0;
 849
 850err_unpin:
 851        i915_gem_object_unpin_pages(obj);
 852        return ret;
 853}
 854
 855int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 856                                     unsigned int *needs_clflush)
 857{
 858        int ret;
 859
 860        lockdep_assert_held(&obj->base.dev->struct_mutex);
 861
 862        *needs_clflush = 0;
 863        if (!i915_gem_object_has_struct_page(obj))
 864                return -ENODEV;
 865
 866        ret = i915_gem_object_wait(obj,
 867                                   I915_WAIT_INTERRUPTIBLE |
 868                                   I915_WAIT_LOCKED |
 869                                   I915_WAIT_ALL,
 870                                   MAX_SCHEDULE_TIMEOUT,
 871                                   NULL);
 872        if (ret)
 873                return ret;
 874
 875        ret = i915_gem_object_pin_pages(obj);
 876        if (ret)
 877                return ret;
 878
 879        if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
 880            !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 881                ret = i915_gem_object_set_to_cpu_domain(obj, true);
 882                if (ret)
 883                        goto err_unpin;
 884                else
 885                        goto out;
 886        }
 887
 888        flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 889
 890        /* If we're not in the cpu write domain, set ourself into the
 891         * gtt write domain and manually flush cachelines (as required).
 892         * This optimizes for the case when the gpu will use the data
 893         * right away and we therefore have to clflush anyway.
 894         */
 895        if (!obj->cache_dirty) {
 896                *needs_clflush |= CLFLUSH_AFTER;
 897
 898                /*
 899                 * Same trick applies to invalidate partially written
 900                 * cachelines read before writing.
 901                 */
 902                if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
 903                        *needs_clflush |= CLFLUSH_BEFORE;
 904        }
 905
 906out:
 907        intel_fb_obj_invalidate(obj, ORIGIN_CPU);
 908        obj->mm.dirty = true;
 909        /* return with the pages pinned */
 910        return 0;
 911
 912err_unpin:
 913        i915_gem_object_unpin_pages(obj);
 914        return ret;
 915}
 916
 917static void
 918shmem_clflush_swizzled_range(char *addr, unsigned long length,
 919                             bool swizzled)
 920{
 921        if (unlikely(swizzled)) {
 922                unsigned long start = (unsigned long) addr;
 923                unsigned long end = (unsigned long) addr + length;
 924
 925                /* For swizzling simply ensure that we always flush both
 926                 * channels. Lame, but simple and it works. Swizzled
 927                 * pwrite/pread is far from a hotpath - current userspace
 928                 * doesn't use it at all. */
 929                start = round_down(start, 128);
 930                end = round_up(end, 128);
 931
 932                drm_clflush_virt_range((void *)start, end - start);
 933        } else {
 934                drm_clflush_virt_range(addr, length);
 935        }
 936
 937}
 938
 939/* Only difference to the fast-path function is that this can handle bit17
 940 * and uses non-atomic copy and kmap functions. */
 941static int
 942shmem_pread_slow(struct page *page, int offset, int length,
 943                 char __user *user_data,
 944                 bool page_do_bit17_swizzling, bool needs_clflush)
 945{
 946        char *vaddr;
 947        int ret;
 948
 949        vaddr = kmap(page);
 950        if (needs_clflush)
 951                shmem_clflush_swizzled_range(vaddr + offset, length,
 952                                             page_do_bit17_swizzling);
 953
 954        if (page_do_bit17_swizzling)
 955                ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
 956        else
 957                ret = __copy_to_user(user_data, vaddr + offset, length);
 958        kunmap(page);
 959
 960        return ret ? - EFAULT : 0;
 961}
 962
 963static int
 964shmem_pread(struct page *page, int offset, int length, char __user *user_data,
 965            bool page_do_bit17_swizzling, bool needs_clflush)
 966{
 967        int ret;
 968
 969        ret = -ENODEV;
 970        if (!page_do_bit17_swizzling) {
 971                char *vaddr = kmap_atomic(page);
 972
 973                if (needs_clflush)
 974                        drm_clflush_virt_range(vaddr + offset, length);
 975                ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
 976                kunmap_atomic(vaddr);
 977        }
 978        if (ret == 0)
 979                return 0;
 980
 981        return shmem_pread_slow(page, offset, length, user_data,
 982                                page_do_bit17_swizzling, needs_clflush);
 983}
 984
 985static int
 986i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 987                     struct drm_i915_gem_pread *args)
 988{
 989        char __user *user_data;
 990        u64 remain;
 991        unsigned int obj_do_bit17_swizzling;
 992        unsigned int needs_clflush;
 993        unsigned int idx, offset;
 994        int ret;
 995
 996        obj_do_bit17_swizzling = 0;
 997        if (i915_gem_object_needs_bit17_swizzle(obj))
 998                obj_do_bit17_swizzling = BIT(17);
 999
1000        ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
1001        if (ret)
1002                return ret;
1003
1004        ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
1005        mutex_unlock(&obj->base.dev->struct_mutex);
1006        if (ret)
1007                return ret;
1008
1009        remain = args->size;
1010        user_data = u64_to_user_ptr(args->data_ptr);
1011        offset = offset_in_page(args->offset);
1012        for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1013                struct page *page = i915_gem_object_get_page(obj, idx);
1014                int length;
1015
1016                length = remain;
1017                if (offset + length > PAGE_SIZE)
1018                        length = PAGE_SIZE - offset;
1019
1020                ret = shmem_pread(page, offset, length, user_data,
1021                                  page_to_phys(page) & obj_do_bit17_swizzling,
1022                                  needs_clflush);
1023                if (ret)
1024                        break;
1025
1026                remain -= length;
1027                user_data += length;
1028                offset = 0;
1029        }
1030
1031        i915_gem_obj_finish_shmem_access(obj);
1032        return ret;
1033}
1034
1035static inline bool
1036gtt_user_read(struct io_mapping *mapping,
1037              loff_t base, int offset,
1038              char __user *user_data, int length)
1039{
1040        void __iomem *vaddr;
1041        unsigned long unwritten;
1042
1043        /* We can use the cpu mem copy function because this is X86. */
1044        vaddr = io_mapping_map_atomic_wc(mapping, base);
1045        unwritten = __copy_to_user_inatomic(user_data,
1046                                            (void __force *)vaddr + offset,
1047                                            length);
1048        io_mapping_unmap_atomic(vaddr);
1049        if (unwritten) {
1050                vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
1051                unwritten = copy_to_user(user_data,
1052                                         (void __force *)vaddr + offset,
1053                                         length);
1054                io_mapping_unmap(vaddr);
1055        }
1056        return unwritten;
1057}
1058
1059static int
1060i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
1061                   const struct drm_i915_gem_pread *args)
1062{
1063        struct drm_i915_private *i915 = to_i915(obj->base.dev);
1064        struct i915_ggtt *ggtt = &i915->ggtt;
1065        struct drm_mm_node node;
1066        struct i915_vma *vma;
1067        void __user *user_data;
1068        u64 remain, offset;
1069        int ret;
1070
1071        ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1072        if (ret)
1073                return ret;
1074
1075        intel_runtime_pm_get(i915);
1076        vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1077                                       PIN_MAPPABLE |
1078                                       PIN_NONFAULT |
1079                                       PIN_NONBLOCK);
1080        if (!IS_ERR(vma)) {
1081                node.start = i915_ggtt_offset(vma);
1082                node.allocated = false;
1083                ret = i915_vma_put_fence(vma);
1084                if (ret) {
1085                        i915_vma_unpin(vma);
1086                        vma = ERR_PTR(ret);
1087                }
1088        }
1089        if (IS_ERR(vma)) {
1090                ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1091                if (ret)
1092                        goto out_unlock;
1093                GEM_BUG_ON(!node.allocated);
1094        }
1095
1096        ret = i915_gem_object_set_to_gtt_domain(obj, false);
1097        if (ret)
1098                goto out_unpin;
1099
1100        mutex_unlock(&i915->drm.struct_mutex);
1101
1102        user_data = u64_to_user_ptr(args->data_ptr);
1103        remain = args->size;
1104        offset = args->offset;
1105
1106        while (remain > 0) {
1107                /* Operation in this page
1108                 *
1109                 * page_base = page offset within aperture
1110                 * page_offset = offset within page
1111                 * page_length = bytes to copy for this page
1112                 */
1113                u32 page_base = node.start;
1114                unsigned page_offset = offset_in_page(offset);
1115                unsigned page_length = PAGE_SIZE - page_offset;
1116                page_length = remain < page_length ? remain : page_length;
1117                if (node.allocated) {
1118                        wmb();
1119                        ggtt->base.insert_page(&ggtt->base,
1120                                               i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1121                                               node.start, I915_CACHE_NONE, 0);
1122                        wmb();
1123                } else {
1124                        page_base += offset & PAGE_MASK;
1125                }
1126
1127                if (gtt_user_read(&ggtt->iomap, page_base, page_offset,
1128                                  user_data, page_length)) {
1129                        ret = -EFAULT;
1130                        break;
1131                }
1132
1133                remain -= page_length;
1134                user_data += page_length;
1135                offset += page_length;
1136        }
1137
1138        mutex_lock(&i915->drm.struct_mutex);
1139out_unpin:
1140        if (node.allocated) {
1141                wmb();
1142                ggtt->base.clear_range(&ggtt->base,
1143                                       node.start, node.size);
1144                remove_mappable_node(&node);
1145        } else {
1146                i915_vma_unpin(vma);
1147        }
1148out_unlock:
1149        intel_runtime_pm_put(i915);
1150        mutex_unlock(&i915->drm.struct_mutex);
1151
1152        return ret;
1153}
1154
1155/**
1156 * Reads data from the object referenced by handle.
1157 * @dev: drm device pointer
1158 * @data: ioctl data blob
1159 * @file: drm file pointer
1160 *
1161 * On error, the contents of *data are undefined.
1162 */
1163int
1164i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1165                     struct drm_file *file)
1166{
1167        struct drm_i915_gem_pread *args = data;
1168        struct drm_i915_gem_object *obj;
1169        int ret;
1170
1171        if (args->size == 0)
1172                return 0;
1173
1174        if (!access_ok(VERIFY_WRITE,
1175                       u64_to_user_ptr(args->data_ptr),
1176                       args->size))
1177                return -EFAULT;
1178
1179        obj = i915_gem_object_lookup(file, args->handle);
1180        if (!obj)
1181                return -ENOENT;
1182
1183        /* Bounds check source.  */
1184        if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1185                ret = -EINVAL;
1186                goto out;
1187        }
1188
1189        trace_i915_gem_object_pread(obj, args->offset, args->size);
1190
1191        ret = i915_gem_object_wait(obj,
1192                                   I915_WAIT_INTERRUPTIBLE,
1193                                   MAX_SCHEDULE_TIMEOUT,
1194                                   to_rps_client(file));
1195        if (ret)
1196                goto out;
1197
1198        ret = i915_gem_object_pin_pages(obj);
1199        if (ret)
1200                goto out;
1201
1202        ret = i915_gem_shmem_pread(obj, args);
1203        if (ret == -EFAULT || ret == -ENODEV)
1204                ret = i915_gem_gtt_pread(obj, args);
1205
1206        i915_gem_object_unpin_pages(obj);
1207out:
1208        i915_gem_object_put(obj);
1209        return ret;
1210}
1211
1212/* This is the fast write path which cannot handle
1213 * page faults in the source data
1214 */
1215
1216static inline bool
1217ggtt_write(struct io_mapping *mapping,
1218           loff_t base, int offset,
1219           char __user *user_data, int length)
1220{
1221        void __iomem *vaddr;
1222        unsigned long unwritten;
1223
1224        /* We can use the cpu mem copy function because this is X86. */
1225        vaddr = io_mapping_map_atomic_wc(mapping, base);
1226        unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
1227                                                      user_data, length);
1228        io_mapping_unmap_atomic(vaddr);
1229        if (unwritten) {
1230                vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
1231                unwritten = copy_from_user((void __force *)vaddr + offset,
1232                                           user_data, length);
1233                io_mapping_unmap(vaddr);
1234        }
1235
1236        return unwritten;
1237}
1238
1239/**
1240 * This is the fast pwrite path, where we copy the data directly from the
1241 * user into the GTT, uncached.
1242 * @obj: i915 GEM object
1243 * @args: pwrite arguments structure
1244 */
1245static int
1246i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
1247                         const struct drm_i915_gem_pwrite *args)
1248{
1249        struct drm_i915_private *i915 = to_i915(obj->base.dev);
1250        struct i915_ggtt *ggtt = &i915->ggtt;
1251        struct drm_mm_node node;
1252        struct i915_vma *vma;
1253        u64 remain, offset;
1254        void __user *user_data;
1255        int ret;
1256
1257        ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1258        if (ret)
1259                return ret;
1260
1261        if (i915_gem_object_has_struct_page(obj)) {
1262                /*
1263                 * Avoid waking the device up if we can fallback, as
1264                 * waking/resuming is very slow (worst-case 10-100 ms
1265                 * depending on PCI sleeps and our own resume time).
1266                 * This easily dwarfs any performance advantage from
1267                 * using the cache bypass of indirect GGTT access.
1268                 */
1269                if (!intel_runtime_pm_get_if_in_use(i915)) {
1270                        ret = -EFAULT;
1271                        goto out_unlock;
1272                }
1273        } else {
1274                /* No backing pages, no fallback, we must force GGTT access */
1275                intel_runtime_pm_get(i915);
1276        }
1277
1278        vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1279                                       PIN_MAPPABLE |
1280                                       PIN_NONFAULT |
1281                                       PIN_NONBLOCK);
1282        if (!IS_ERR(vma)) {
1283                node.start = i915_ggtt_offset(vma);
1284                node.allocated = false;
1285                ret = i915_vma_put_fence(vma);
1286                if (ret) {
1287                        i915_vma_unpin(vma);
1288                        vma = ERR_PTR(ret);
1289                }
1290        }
1291        if (IS_ERR(vma)) {
1292                ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1293                if (ret)
1294                        goto out_rpm;
1295                GEM_BUG_ON(!node.allocated);
1296        }
1297
1298        ret = i915_gem_object_set_to_gtt_domain(obj, true);
1299        if (ret)
1300                goto out_unpin;
1301
1302        mutex_unlock(&i915->drm.struct_mutex);
1303
1304        intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1305
1306        user_data = u64_to_user_ptr(args->data_ptr);
1307        offset = args->offset;
1308        remain = args->size;
1309        while (remain) {
1310                /* Operation in this page
1311                 *
1312                 * page_base = page offset within aperture
1313                 * page_offset = offset within page
1314                 * page_length = bytes to copy for this page
1315                 */
1316                u32 page_base = node.start;
1317                unsigned int page_offset = offset_in_page(offset);
1318                unsigned int page_length = PAGE_SIZE - page_offset;
1319                page_length = remain < page_length ? remain : page_length;
1320                if (node.allocated) {
1321                        wmb(); /* flush the write before we modify the GGTT */
1322                        ggtt->base.insert_page(&ggtt->base,
1323                                               i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1324                                               node.start, I915_CACHE_NONE, 0);
1325                        wmb(); /* flush modifications to the GGTT (insert_page) */
1326                } else {
1327                        page_base += offset & PAGE_MASK;
1328                }
1329                /* If we get a fault while copying data, then (presumably) our
1330                 * source page isn't available.  Return the error and we'll
1331                 * retry in the slow path.
1332                 * If the object is non-shmem backed, we retry again with the
1333                 * path that handles page fault.
1334                 */
1335                if (ggtt_write(&ggtt->iomap, page_base, page_offset,
1336                               user_data, page_length)) {
1337                        ret = -EFAULT;
1338                        break;
1339                }
1340
1341                remain -= page_length;
1342                user_data += page_length;
1343                offset += page_length;
1344        }
1345        intel_fb_obj_flush(obj, ORIGIN_CPU);
1346
1347        mutex_lock(&i915->drm.struct_mutex);
1348out_unpin:
1349        if (node.allocated) {
1350                wmb();
1351                ggtt->base.clear_range(&ggtt->base,
1352                                       node.start, node.size);
1353                remove_mappable_node(&node);
1354        } else {
1355                i915_vma_unpin(vma);
1356        }
1357out_rpm:
1358        intel_runtime_pm_put(i915);
1359out_unlock:
1360        mutex_unlock(&i915->drm.struct_mutex);
1361        return ret;
1362}
1363
1364static int
1365shmem_pwrite_slow(struct page *page, int offset, int length,
1366                  char __user *user_data,
1367                  bool page_do_bit17_swizzling,
1368                  bool needs_clflush_before,
1369                  bool needs_clflush_after)
1370{
1371        char *vaddr;
1372        int ret;
1373
1374        vaddr = kmap(page);
1375        if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
1376                shmem_clflush_swizzled_range(vaddr + offset, length,
1377                                             page_do_bit17_swizzling);
1378        if (page_do_bit17_swizzling)
1379                ret = __copy_from_user_swizzled(vaddr, offset, user_data,
1380                                                length);
1381        else
1382                ret = __copy_from_user(vaddr + offset, user_data, length);
1383        if (needs_clflush_after)
1384                shmem_clflush_swizzled_range(vaddr + offset, length,
1385                                             page_do_bit17_swizzling);
1386        kunmap(page);
1387
1388        return ret ? -EFAULT : 0;
1389}
1390
1391/* Per-page copy function for the shmem pwrite fastpath.
1392 * Flushes invalid cachelines before writing to the target if
1393 * needs_clflush_before is set and flushes out any written cachelines after
1394 * writing if needs_clflush is set.
1395 */
1396static int
1397shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1398             bool page_do_bit17_swizzling,
1399             bool needs_clflush_before,
1400             bool needs_clflush_after)
1401{
1402        int ret;
1403
1404        ret = -ENODEV;
1405        if (!page_do_bit17_swizzling) {
1406                char *vaddr = kmap_atomic(page);
1407
1408                if (needs_clflush_before)
1409                        drm_clflush_virt_range(vaddr + offset, len);
1410                ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
1411                if (needs_clflush_after)
1412                        drm_clflush_virt_range(vaddr + offset, len);
1413
1414                kunmap_atomic(vaddr);
1415        }
1416        if (ret == 0)
1417                return ret;
1418
1419        return shmem_pwrite_slow(page, offset, len, user_data,
1420                                 page_do_bit17_swizzling,
1421                                 needs_clflush_before,
1422                                 needs_clflush_after);
1423}
1424
1425static int
1426i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1427                      const struct drm_i915_gem_pwrite *args)
1428{
1429        struct drm_i915_private *i915 = to_i915(obj->base.dev);
1430        void __user *user_data;
1431        u64 remain;
1432        unsigned int obj_do_bit17_swizzling;
1433        unsigned int partial_cacheline_write;
1434        unsigned int needs_clflush;
1435        unsigned int offset, idx;
1436        int ret;
1437
1438        ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1439        if (ret)
1440                return ret;
1441
1442        ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1443        mutex_unlock(&i915->drm.struct_mutex);
1444        if (ret)
1445                return ret;
1446
1447        obj_do_bit17_swizzling = 0;
1448        if (i915_gem_object_needs_bit17_swizzle(obj))
1449                obj_do_bit17_swizzling = BIT(17);
1450
1451        /* If we don't overwrite a cacheline completely we need to be
1452         * careful to have up-to-date data by first clflushing. Don't
1453         * overcomplicate things and flush the entire patch.
1454         */
1455        partial_cacheline_write = 0;
1456        if (needs_clflush & CLFLUSH_BEFORE)
1457                partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
1458
1459        user_data = u64_to_user_ptr(args->data_ptr);
1460        remain = args->size;
1461        offset = offset_in_page(args->offset);
1462        for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1463                struct page *page = i915_gem_object_get_page(obj, idx);
1464                int length;
1465
1466                length = remain;
1467                if (offset + length > PAGE_SIZE)
1468                        length = PAGE_SIZE - offset;
1469
1470                ret = shmem_pwrite(page, offset, length, user_data,
1471                                   page_to_phys(page) & obj_do_bit17_swizzling,
1472                                   (offset | length) & partial_cacheline_write,
1473                                   needs_clflush & CLFLUSH_AFTER);
1474                if (ret)
1475                        break;
1476
1477                remain -= length;
1478                user_data += length;
1479                offset = 0;
1480        }
1481
1482        intel_fb_obj_flush(obj, ORIGIN_CPU);
1483        i915_gem_obj_finish_shmem_access(obj);
1484        return ret;
1485}
1486
1487/**
1488 * Writes data to the object referenced by handle.
1489 * @dev: drm device
1490 * @data: ioctl data blob
1491 * @file: drm file
1492 *
1493 * On error, the contents of the buffer that were to be modified are undefined.
1494 */
1495int
1496i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1497                      struct drm_file *file)
1498{
1499        struct drm_i915_gem_pwrite *args = data;
1500        struct drm_i915_gem_object *obj;
1501        int ret;
1502
1503        if (args->size == 0)
1504                return 0;
1505
1506        if (!access_ok(VERIFY_READ,
1507                       u64_to_user_ptr(args->data_ptr),
1508                       args->size))
1509                return -EFAULT;
1510
1511        obj = i915_gem_object_lookup(file, args->handle);
1512        if (!obj)
1513                return -ENOENT;
1514
1515        /* Bounds check destination. */
1516        if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1517                ret = -EINVAL;
1518                goto err;
1519        }
1520
1521        trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1522
1523        ret = -ENODEV;
1524        if (obj->ops->pwrite)
1525                ret = obj->ops->pwrite(obj, args);
1526        if (ret != -ENODEV)
1527                goto err;
1528
1529        ret = i915_gem_object_wait(obj,
1530                                   I915_WAIT_INTERRUPTIBLE |
1531                                   I915_WAIT_ALL,
1532                                   MAX_SCHEDULE_TIMEOUT,
1533                                   to_rps_client(file));
1534        if (ret)
1535                goto err;
1536
1537        ret = i915_gem_object_pin_pages(obj);
1538        if (ret)
1539                goto err;
1540
1541        ret = -EFAULT;
1542        /* We can only do the GTT pwrite on untiled buffers, as otherwise
1543         * it would end up going through the fenced access, and we'll get
1544         * different detiling behavior between reading and writing.
1545         * pread/pwrite currently are reading and writing from the CPU
1546         * perspective, requiring manual detiling by the client.
1547         */
1548        if (!i915_gem_object_has_struct_page(obj) ||
1549            cpu_write_needs_clflush(obj))
1550                /* Note that the gtt paths might fail with non-page-backed user
1551                 * pointers (e.g. gtt mappings when moving data between
1552                 * textures). Fallback to the shmem path in that case.
1553                 */
1554                ret = i915_gem_gtt_pwrite_fast(obj, args);
1555
1556        if (ret == -EFAULT || ret == -ENOSPC) {
1557                if (obj->phys_handle)
1558                        ret = i915_gem_phys_pwrite(obj, args, file);
1559                else
1560                        ret = i915_gem_shmem_pwrite(obj, args);
1561        }
1562
1563        i915_gem_object_unpin_pages(obj);
1564err:
1565        i915_gem_object_put(obj);
1566        return ret;
1567}
1568
1569static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1570{
1571        struct drm_i915_private *i915;
1572        struct list_head *list;
1573        struct i915_vma *vma;
1574
1575        GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
1576
1577        for_each_ggtt_vma(vma, obj) {
1578                if (i915_vma_is_active(vma))
1579                        continue;
1580
1581                if (!drm_mm_node_allocated(&vma->node))
1582                        continue;
1583
1584                list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
1585        }
1586
1587        i915 = to_i915(obj->base.dev);
1588        spin_lock(&i915->mm.obj_lock);
1589        list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1590        list_move_tail(&obj->mm.link, list);
1591        spin_unlock(&i915->mm.obj_lock);
1592}
1593
1594/**
1595 * Called when user space prepares to use an object with the CPU, either
1596 * through the mmap ioctl's mapping or a GTT mapping.
1597 * @dev: drm device
1598 * @data: ioctl data blob
1599 * @file: drm file
1600 */
1601int
1602i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1603                          struct drm_file *file)
1604{
1605        struct drm_i915_gem_set_domain *args = data;
1606        struct drm_i915_gem_object *obj;
1607        uint32_t read_domains = args->read_domains;
1608        uint32_t write_domain = args->write_domain;
1609        int err;
1610
1611        /* Only handle setting domains to types used by the CPU. */
1612        if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1613                return -EINVAL;
1614
1615        /* Having something in the write domain implies it's in the read
1616         * domain, and only that read domain.  Enforce that in the request.
1617         */
1618        if (write_domain != 0 && read_domains != write_domain)
1619                return -EINVAL;
1620
1621        obj = i915_gem_object_lookup(file, args->handle);
1622        if (!obj)
1623                return -ENOENT;
1624
1625        /* Try to flush the object off the GPU without holding the lock.
1626         * We will repeat the flush holding the lock in the normal manner
1627         * to catch cases where we are gazumped.
1628         */
1629        err = i915_gem_object_wait(obj,
1630                                   I915_WAIT_INTERRUPTIBLE |
1631                                   (write_domain ? I915_WAIT_ALL : 0),
1632                                   MAX_SCHEDULE_TIMEOUT,
1633                                   to_rps_client(file));
1634        if (err)
1635                goto out;
1636
1637        /*
1638         * Proxy objects do not control access to the backing storage, ergo
1639         * they cannot be used as a means to manipulate the cache domain
1640         * tracking for that backing storage. The proxy object is always
1641         * considered to be outside of any cache domain.
1642         */
1643        if (i915_gem_object_is_proxy(obj)) {
1644                err = -ENXIO;
1645                goto out;
1646        }
1647
1648        /*
1649         * Flush and acquire obj->pages so that we are coherent through
1650         * direct access in memory with previous cached writes through
1651         * shmemfs and that our cache domain tracking remains valid.
1652         * For example, if the obj->filp was moved to swap without us
1653         * being notified and releasing the pages, we would mistakenly
1654         * continue to assume that the obj remained out of the CPU cached
1655         * domain.
1656         */
1657        err = i915_gem_object_pin_pages(obj);
1658        if (err)
1659                goto out;
1660
1661        err = i915_mutex_lock_interruptible(dev);
1662        if (err)
1663                goto out_unpin;
1664
1665        if (read_domains & I915_GEM_DOMAIN_WC)
1666                err = i915_gem_object_set_to_wc_domain(obj, write_domain);
1667        else if (read_domains & I915_GEM_DOMAIN_GTT)
1668                err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
1669        else
1670                err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
1671
1672        /* And bump the LRU for this access */
1673        i915_gem_object_bump_inactive_ggtt(obj);
1674
1675        mutex_unlock(&dev->struct_mutex);
1676
1677        if (write_domain != 0)
1678                intel_fb_obj_invalidate(obj,
1679                                        fb_write_origin(obj, write_domain));
1680
1681out_unpin:
1682        i915_gem_object_unpin_pages(obj);
1683out:
1684        i915_gem_object_put(obj);
1685        return err;
1686}
1687
1688/**
1689 * Called when user space has done writes to this buffer
1690 * @dev: drm device
1691 * @data: ioctl data blob
1692 * @file: drm file
1693 */
1694int
1695i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1696                         struct drm_file *file)
1697{
1698        struct drm_i915_gem_sw_finish *args = data;
1699        struct drm_i915_gem_object *obj;
1700
1701        obj = i915_gem_object_lookup(file, args->handle);
1702        if (!obj)
1703                return -ENOENT;
1704
1705        /*
1706         * Proxy objects are barred from CPU access, so there is no
1707         * need to ban sw_finish as it is a nop.
1708         */
1709
1710        /* Pinned buffers may be scanout, so flush the cache */
1711        i915_gem_object_flush_if_display(obj);
1712        i915_gem_object_put(obj);
1713
1714        return 0;
1715}
1716
1717/**
1718 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1719 *                       it is mapped to.
1720 * @dev: drm device
1721 * @data: ioctl data blob
1722 * @file: drm file
1723 *
1724 * While the mapping holds a reference on the contents of the object, it doesn't
1725 * imply a ref on the object itself.
1726 *
1727 * IMPORTANT:
1728 *
1729 * DRM driver writers who look a this function as an example for how to do GEM
1730 * mmap support, please don't implement mmap support like here. The modern way
1731 * to implement DRM mmap support is with an mmap offset ioctl (like
1732 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1733 * That way debug tooling like valgrind will understand what's going on, hiding
1734 * the mmap call in a driver private ioctl will break that. The i915 driver only
1735 * does cpu mmaps this way because we didn't know better.
1736 */
1737int
1738i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1739                    struct drm_file *file)
1740{
1741        struct drm_i915_gem_mmap *args = data;
1742        struct drm_i915_gem_object *obj;
1743        unsigned long addr;
1744
1745        if (args->flags & ~(I915_MMAP_WC))
1746                return -EINVAL;
1747
1748        if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1749                return -ENODEV;
1750
1751        obj = i915_gem_object_lookup(file, args->handle);
1752        if (!obj)
1753                return -ENOENT;
1754
1755        /* prime objects have no backing filp to GEM mmap
1756         * pages from.
1757         */
1758        if (!obj->base.filp) {
1759                i915_gem_object_put(obj);
1760                return -ENXIO;
1761        }
1762
1763        addr = vm_mmap(obj->base.filp, 0, args->size,
1764                       PROT_READ | PROT_WRITE, MAP_SHARED,
1765                       args->offset);
1766        if (args->flags & I915_MMAP_WC) {
1767                struct mm_struct *mm = current->mm;
1768                struct vm_area_struct *vma;
1769
1770                if (down_write_killable(&mm->mmap_sem)) {
1771                        i915_gem_object_put(obj);
1772                        return -EINTR;
1773                }
1774                vma = find_vma(mm, addr);
1775                if (vma)
1776                        vma->vm_page_prot =
1777                                pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1778                else
1779                        addr = -ENOMEM;
1780                up_write(&mm->mmap_sem);
1781
1782                /* This may race, but that's ok, it only gets set */
1783                WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1784        }
1785        i915_gem_object_put(obj);
1786        if (IS_ERR((void *)addr))
1787                return addr;
1788
1789        args->addr_ptr = (uint64_t) addr;
1790
1791        return 0;
1792}
1793
1794static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
1795{
1796        return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
1797}
1798
1799/**
1800 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1801 *
1802 * A history of the GTT mmap interface:
1803 *
1804 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1805 *     aligned and suitable for fencing, and still fit into the available
1806 *     mappable space left by the pinned display objects. A classic problem
1807 *     we called the page-fault-of-doom where we would ping-pong between
1808 *     two objects that could not fit inside the GTT and so the memcpy
1809 *     would page one object in at the expense of the other between every
1810 *     single byte.
1811 *
1812 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1813 *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1814 *     object is too large for the available space (or simply too large
1815 *     for the mappable aperture!), a view is created instead and faulted
1816 *     into userspace. (This view is aligned and sized appropriately for
1817 *     fenced access.)
1818 *
1819 * 2 - Recognise WC as a separate cache domain so that we can flush the
1820 *     delayed writes via GTT before performing direct access via WC.
1821 *
1822 * Restrictions:
1823 *
1824 *  * snoopable objects cannot be accessed via the GTT. It can cause machine
1825 *    hangs on some architectures, corruption on others. An attempt to service
1826 *    a GTT page fault from a snoopable object will generate a SIGBUS.
1827 *
1828 *  * the object must be able to fit into RAM (physical memory, though no
1829 *    limited to the mappable aperture).
1830 *
1831 *
1832 * Caveats:
1833 *
1834 *  * a new GTT page fault will synchronize rendering from the GPU and flush
1835 *    all data to system memory. Subsequent access will not be synchronized.
1836 *
1837 *  * all mappings are revoked on runtime device suspend.
1838 *
1839 *  * there are only 8, 16 or 32 fence registers to share between all users
1840 *    (older machines require fence register for display and blitter access
1841 *    as well). Contention of the fence registers will cause the previous users
1842 *    to be unmapped and any new access will generate new page faults.
1843 *
1844 *  * running out of memory while servicing a fault may generate a SIGBUS,
1845 *    rather than the expected SIGSEGV.
1846 */
1847int i915_gem_mmap_gtt_version(void)
1848{
1849        return 2;
1850}
1851
1852static inline struct i915_ggtt_view
1853compute_partial_view(struct drm_i915_gem_object *obj,
1854                     pgoff_t page_offset,
1855                     unsigned int chunk)
1856{
1857        struct i915_ggtt_view view;
1858
1859        if (i915_gem_object_is_tiled(obj))
1860                chunk = roundup(chunk, tile_row_pages(obj));
1861
1862        view.type = I915_GGTT_VIEW_PARTIAL;
1863        view.partial.offset = rounddown(page_offset, chunk);
1864        view.partial.size =
1865                min_t(unsigned int, chunk,
1866                      (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
1867
1868        /* If the partial covers the entire object, just create a normal VMA. */
1869        if (chunk >= obj->base.size >> PAGE_SHIFT)
1870                view.type = I915_GGTT_VIEW_NORMAL;
1871
1872        return view;
1873}
1874
1875/**
1876 * i915_gem_fault - fault a page into the GTT
1877 * @vmf: fault info
1878 *
1879 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1880 * from userspace.  The fault handler takes care of binding the object to
1881 * the GTT (if needed), allocating and programming a fence register (again,
1882 * only if needed based on whether the old reg is still valid or the object
1883 * is tiled) and inserting a new PTE into the faulting process.
1884 *
1885 * Note that the faulting process may involve evicting existing objects
1886 * from the GTT and/or fence registers to make room.  So performance may
1887 * suffer if the GTT working set is large or there are few fence registers
1888 * left.
1889 *
1890 * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1891 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1892 */
1893int i915_gem_fault(struct vm_fault *vmf)
1894{
1895#define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
1896        struct vm_area_struct *area = vmf->vma;
1897        struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1898        struct drm_device *dev = obj->base.dev;
1899        struct drm_i915_private *dev_priv = to_i915(dev);
1900        struct i915_ggtt *ggtt = &dev_priv->ggtt;
1901        bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1902        struct i915_vma *vma;
1903        pgoff_t page_offset;
1904        unsigned int flags;
1905        int ret;
1906
1907        /* We don't use vmf->pgoff since that has the fake offset */
1908        page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
1909
1910        trace_i915_gem_object_fault(obj, page_offset, true, write);
1911
1912        /* Try to flush the object off the GPU first without holding the lock.
1913         * Upon acquiring the lock, we will perform our sanity checks and then
1914         * repeat the flush holding the lock in the normal manner to catch cases
1915         * where we are gazumped.
1916         */
1917        ret = i915_gem_object_wait(obj,
1918                                   I915_WAIT_INTERRUPTIBLE,
1919                                   MAX_SCHEDULE_TIMEOUT,
1920                                   NULL);
1921        if (ret)
1922                goto err;
1923
1924        ret = i915_gem_object_pin_pages(obj);
1925        if (ret)
1926                goto err;
1927
1928        intel_runtime_pm_get(dev_priv);
1929
1930        ret = i915_mutex_lock_interruptible(dev);
1931        if (ret)
1932                goto err_rpm;
1933
1934        /* Access to snoopable pages through the GTT is incoherent. */
1935        if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
1936                ret = -EFAULT;
1937                goto err_unlock;
1938        }
1939
1940        /* If the object is smaller than a couple of partial vma, it is
1941         * not worth only creating a single partial vma - we may as well
1942         * clear enough space for the full object.
1943         */
1944        flags = PIN_MAPPABLE;
1945        if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT)
1946                flags |= PIN_NONBLOCK | PIN_NONFAULT;
1947
1948        /* Now pin it into the GTT as needed */
1949        vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
1950        if (IS_ERR(vma)) {
1951                /* Use a partial view if it is bigger than available space */
1952                struct i915_ggtt_view view =
1953                        compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
1954
1955                /* Userspace is now writing through an untracked VMA, abandon
1956                 * all hope that the hardware is able to track future writes.
1957                 */
1958                obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1959
1960                vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
1961        }
1962        if (IS_ERR(vma)) {
1963                ret = PTR_ERR(vma);
1964                goto err_unlock;
1965        }
1966
1967        ret = i915_gem_object_set_to_gtt_domain(obj, write);
1968        if (ret)
1969                goto err_unpin;
1970
1971        ret = i915_vma_pin_fence(vma);
1972        if (ret)
1973                goto err_unpin;
1974
1975        /* Finally, remap it using the new GTT offset */
1976        ret = remap_io_mapping(area,
1977                               area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
1978                               (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
1979                               min_t(u64, vma->size, area->vm_end - area->vm_start),
1980                               &ggtt->iomap);
1981        if (ret)
1982                goto err_fence;
1983
1984        /* Mark as being mmapped into userspace for later revocation */
1985        assert_rpm_wakelock_held(dev_priv);
1986        if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
1987                list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
1988        GEM_BUG_ON(!obj->userfault_count);
1989
1990        i915_vma_set_ggtt_write(vma);
1991
1992err_fence:
1993        i915_vma_unpin_fence(vma);
1994err_unpin:
1995        __i915_vma_unpin(vma);
1996err_unlock:
1997        mutex_unlock(&dev->struct_mutex);
1998err_rpm:
1999        intel_runtime_pm_put(dev_priv);
2000        i915_gem_object_unpin_pages(obj);
2001err:
2002        switch (ret) {
2003        case -EIO:
2004                /*
2005                 * We eat errors when the gpu is terminally wedged to avoid
2006                 * userspace unduly crashing (gl has no provisions for mmaps to
2007                 * fail). But any other -EIO isn't ours (e.g. swap in failure)
2008                 * and so needs to be reported.
2009                 */
2010                if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
2011                        ret = VM_FAULT_SIGBUS;
2012                        break;
2013                }
2014        case -EAGAIN:
2015                /*
2016                 * EAGAIN means the gpu is hung and we'll wait for the error
2017                 * handler to reset everything when re-faulting in
2018                 * i915_mutex_lock_interruptible.
2019                 */
2020        case 0:
2021        case -ERESTARTSYS:
2022        case -EINTR:
2023        case -EBUSY:
2024                /*
2025                 * EBUSY is ok: this just means that another thread
2026                 * already did the job.
2027                 */
2028                ret = VM_FAULT_NOPAGE;
2029                break;
2030        case -ENOMEM:
2031                ret = VM_FAULT_OOM;
2032                break;
2033        case -ENOSPC:
2034        case -EFAULT:
2035                ret = VM_FAULT_SIGBUS;
2036                break;
2037        default:
2038                WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2039                ret = VM_FAULT_SIGBUS;
2040                break;
2041        }
2042        return ret;
2043}
2044
2045static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
2046{
2047        struct i915_vma *vma;
2048
2049        GEM_BUG_ON(!obj->userfault_count);
2050
2051        obj->userfault_count = 0;
2052        list_del(&obj->userfault_link);
2053        drm_vma_node_unmap(&obj->base.vma_node,
2054                           obj->base.dev->anon_inode->i_mapping);
2055
2056        for_each_ggtt_vma(vma, obj)
2057                i915_vma_unset_userfault(vma);
2058}
2059
2060/**
2061 * i915_gem_release_mmap - remove physical page mappings
2062 * @obj: obj in question
2063 *
2064 * Preserve the reservation of the mmapping with the DRM core code, but
2065 * relinquish ownership of the pages back to the system.
2066 *
2067 * It is vital that we remove the page mapping if we have mapped a tiled
2068 * object through the GTT and then lose the fence register due to
2069 * resource pressure. Similarly if the object has been moved out of the
2070 * aperture, than pages mapped into userspace must be revoked. Removing the
2071 * mapping will then trigger a page fault on the next user access, allowing
2072 * fixup by i915_gem_fault().
2073 */
2074void
2075i915_gem_release_mmap(struct drm_i915_gem_object *obj)
2076{
2077        struct drm_i915_private *i915 = to_i915(obj->base.dev);
2078
2079        /* Serialisation between user GTT access and our code depends upon
2080         * revoking the CPU's PTE whilst the mutex is held. The next user
2081         * pagefault then has to wait until we release the mutex.
2082         *
2083         * Note that RPM complicates somewhat by adding an additional
2084         * requirement that operations to the GGTT be made holding the RPM
2085         * wakeref.
2086         */
2087        lockdep_assert_held(&i915->drm.struct_mutex);
2088        intel_runtime_pm_get(i915);
2089
2090        if (!obj->userfault_count)
2091                goto out;
2092
2093        __i915_gem_object_release_mmap(obj);
2094
2095        /* Ensure that the CPU's PTE are revoked and there are not outstanding
2096         * memory transactions from userspace before we return. The TLB
2097         * flushing implied above by changing the PTE above *should* be
2098         * sufficient, an extra barrier here just provides us with a bit
2099         * of paranoid documentation about our requirement to serialise
2100         * memory writes before touching registers / GSM.
2101         */
2102        wmb();
2103
2104out:
2105        intel_runtime_pm_put(i915);
2106}
2107
2108void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
2109{
2110        struct drm_i915_gem_object *obj, *on;
2111        int i;
2112
2113        /*
2114         * Only called during RPM suspend. All users of the userfault_list
2115         * must be holding an RPM wakeref to ensure that this can not
2116         * run concurrently with themselves (and use the struct_mutex for
2117         * protection between themselves).
2118         */
2119
2120        list_for_each_entry_safe(obj, on,
2121                                 &dev_priv->mm.userfault_list, userfault_link)
2122                __i915_gem_object_release_mmap(obj);
2123
2124        /* The fence will be lost when the device powers down. If any were
2125         * in use by hardware (i.e. they are pinned), we should not be powering
2126         * down! All other fences will be reacquired by the user upon waking.
2127         */
2128        for (i = 0; i < dev_priv->num_fence_regs; i++) {
2129                struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
2130
2131                /* Ideally we want to assert that the fence register is not
2132                 * live at this point (i.e. that no piece of code will be
2133                 * trying to write through fence + GTT, as that both violates
2134                 * our tracking of activity and associated locking/barriers,
2135                 * but also is illegal given that the hw is powered down).
2136                 *
2137                 * Previously we used reg->pin_count as a "liveness" indicator.
2138                 * That is not sufficient, and we need a more fine-grained
2139                 * tool if we want to have a sanity check here.
2140                 */
2141
2142                if (!reg->vma)
2143                        continue;
2144
2145                GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
2146                reg->dirty = true;
2147        }
2148}
2149
2150static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2151{
2152        struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2153        int err;
2154
2155        err = drm_gem_create_mmap_offset(&obj->base);
2156        if (likely(!err))
2157                return 0;
2158
2159        /* Attempt to reap some mmap space from dead objects */
2160        do {
2161                err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
2162                if (err)
2163                        break;
2164
2165                i915_gem_drain_freed_objects(dev_priv);
2166                err = drm_gem_create_mmap_offset(&obj->base);
2167                if (!err)
2168                        break;
2169
2170        } while (flush_delayed_work(&dev_priv->gt.retire_work));
2171
2172        return err;
2173}
2174
2175static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2176{
2177        drm_gem_free_mmap_offset(&obj->base);
2178}
2179
2180int
2181i915_gem_mmap_gtt(struct drm_file *file,
2182                  struct drm_device *dev,
2183                  uint32_t handle,
2184                  uint64_t *offset)
2185{
2186        struct drm_i915_gem_object *obj;
2187        int ret;
2188
2189        obj = i915_gem_object_lookup(file, handle);
2190        if (!obj)
2191                return -ENOENT;
2192
2193        ret = i915_gem_object_create_mmap_offset(obj);
2194        if (ret == 0)
2195                *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2196
2197        i915_gem_object_put(obj);
2198        return ret;
2199}
2200
2201/**
2202 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2203 * @dev: DRM device
2204 * @data: GTT mapping ioctl data
2205 * @file: GEM object info
2206 *
2207 * Simply returns the fake offset to userspace so it can mmap it.
2208 * The mmap call will end up in drm_gem_mmap(), which will set things
2209 * up so we can get faults in the handler above.
2210 *
2211 * The fault handler will take care of binding the object into the GTT
2212 * (since it may have been evicted to make room for something), allocating
2213 * a fence register, and mapping the appropriate aperture address into
2214 * userspace.
2215 */
2216int
2217i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2218                        struct drm_file *file)
2219{
2220        struct drm_i915_gem_mmap_gtt *args = data;
2221
2222        return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2223}
2224
2225/* Immediately discard the backing storage */
2226static void
2227i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2228{
2229        i915_gem_object_free_mmap_offset(obj);
2230
2231        if (obj->base.filp == NULL)
2232                return;
2233
2234        /* Our goal here is to return as much of the memory as
2235         * is possible back to the system as we are called from OOM.
2236         * To do this we must instruct the shmfs to drop all of its
2237         * backing pages, *now*.
2238         */
2239        shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2240        obj->mm.madv = __I915_MADV_PURGED;
2241        obj->mm.pages = ERR_PTR(-EFAULT);
2242}
2243
2244/* Try to discard unwanted pages */
2245void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2246{
2247        struct address_space *mapping;
2248
2249        lockdep_assert_held(&obj->mm.lock);
2250        GEM_BUG_ON(i915_gem_object_has_pages(obj));
2251
2252        switch (obj->mm.madv) {
2253        case I915_MADV_DONTNEED:
2254                i915_gem_object_truncate(obj);
2255        case __I915_MADV_PURGED:
2256                return;
2257        }
2258
2259        if (obj->base.filp == NULL)
2260                return;
2261
2262        mapping = obj->base.filp->f_mapping,
2263        invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2264}
2265
2266static void
2267i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
2268                              struct sg_table *pages)
2269{
2270        struct sgt_iter sgt_iter;
2271        struct page *page;
2272
2273        __i915_gem_object_release_shmem(obj, pages, true);
2274
2275        i915_gem_gtt_finish_pages(obj, pages);
2276
2277        if (i915_gem_object_needs_bit17_swizzle(obj))
2278                i915_gem_object_save_bit_17_swizzle(obj, pages);
2279
2280        for_each_sgt_page(page, sgt_iter, pages) {
2281                if (obj->mm.dirty)
2282                        set_page_dirty(page);
2283
2284                if (obj->mm.madv == I915_MADV_WILLNEED)
2285                        mark_page_accessed(page);
2286
2287                put_page(page);
2288        }
2289        obj->mm.dirty = false;
2290
2291        sg_free_table(pages);
2292        kfree(pages);
2293}
2294
2295static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2296{
2297        struct radix_tree_iter iter;
2298        void __rcu **slot;
2299
2300        rcu_read_lock();
2301        radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2302                radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2303        rcu_read_unlock();
2304}
2305
2306void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2307                                 enum i915_mm_subclass subclass)
2308{
2309        struct drm_i915_private *i915 = to_i915(obj->base.dev);
2310        struct sg_table *pages;
2311
2312        if (i915_gem_object_has_pinned_pages(obj))
2313                return;
2314
2315        GEM_BUG_ON(obj->bind_count);
2316        if (!i915_gem_object_has_pages(obj))
2317                return;
2318
2319        /* May be called by shrinker from within get_pages() (on another bo) */
2320        mutex_lock_nested(&obj->mm.lock, subclass);
2321        if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
2322                goto unlock;
2323
2324        /* ->put_pages might need to allocate memory for the bit17 swizzle
2325         * array, hence protect them from being reaped by removing them from gtt
2326         * lists early. */
2327        pages = fetch_and_zero(&obj->mm.pages);
2328        GEM_BUG_ON(!pages);
2329
2330        spin_lock(&i915->mm.obj_lock);
2331        list_del(&obj->mm.link);
2332        spin_unlock(&i915->mm.obj_lock);
2333
2334        if (obj->mm.mapping) {
2335                void *ptr;
2336
2337                ptr = page_mask_bits(obj->mm.mapping);
2338                if (is_vmalloc_addr(ptr))
2339                        vunmap(ptr);
2340                else
2341                        kunmap(kmap_to_page(ptr));
2342
2343                obj->mm.mapping = NULL;
2344        }
2345
2346        __i915_gem_object_reset_page_iter(obj);
2347
2348        if (!IS_ERR(pages))
2349                obj->ops->put_pages(obj, pages);
2350
2351        obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
2352
2353unlock:
2354        mutex_unlock(&obj->mm.lock);
2355}
2356
2357static bool i915_sg_trim(struct sg_table *orig_st)
2358{
2359        struct sg_table new_st;
2360        struct scatterlist *sg, *new_sg;
2361        unsigned int i;
2362
2363        if (orig_st->nents == orig_st->orig_nents)
2364                return false;
2365
2366        if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
2367                return false;
2368
2369        new_sg = new_st.sgl;
2370        for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
2371                sg_set_page(new_sg, sg_page(sg), sg->length, 0);
2372                /* called before being DMA mapped, no need to copy sg->dma_* */
2373                new_sg = sg_next(new_sg);
2374        }
2375        GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
2376
2377        sg_free_table(orig_st);
2378
2379        *orig_st = new_st;
2380        return true;
2381}
2382
2383static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2384{
2385        struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2386        const unsigned long page_count = obj->base.size / PAGE_SIZE;
2387        unsigned long i;
2388        struct address_space *mapping;
2389        struct sg_table *st;
2390        struct scatterlist *sg;
2391        struct sgt_iter sgt_iter;
2392        struct page *page;
2393        unsigned long last_pfn = 0;     /* suppress gcc warning */
2394        unsigned int max_segment = i915_sg_segment_size();
2395        unsigned int sg_page_sizes;
2396        gfp_t noreclaim;
2397        int ret;
2398
2399        /* Assert that the object is not currently in any GPU domain. As it
2400         * wasn't in the GTT, there shouldn't be any way it could have been in
2401         * a GPU cache
2402         */
2403        GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2404        GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2405
2406        st = kmalloc(sizeof(*st), GFP_KERNEL);
2407        if (st == NULL)
2408                return -ENOMEM;
2409
2410rebuild_st:
2411        if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2412                kfree(st);
2413                return -ENOMEM;
2414        }
2415
2416        /* Get the list of pages out of our struct file.  They'll be pinned
2417         * at this point until we release them.
2418         *
2419         * Fail silently without starting the shrinker
2420         */
2421        mapping = obj->base.filp->f_mapping;
2422        noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
2423        noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
2424
2425        sg = st->sgl;
2426        st->nents = 0;
2427        sg_page_sizes = 0;
2428        for (i = 0; i < page_count; i++) {
2429                const unsigned int shrink[] = {
2430                        I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
2431                        0,
2432                }, *s = shrink;
2433                gfp_t gfp = noreclaim;
2434
2435                do {
2436                        page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2437                        if (likely(!IS_ERR(page)))
2438                                break;
2439
2440                        if (!*s) {
2441                                ret = PTR_ERR(page);
2442                                goto err_sg;
2443                        }
2444
2445                        i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++);
2446                        cond_resched();
2447
2448                        /* We've tried hard to allocate the memory by reaping
2449                         * our own buffer, now let the real VM do its job and
2450                         * go down in flames if truly OOM.
2451                         *
2452                         * However, since graphics tend to be disposable,
2453                         * defer the oom here by reporting the ENOMEM back
2454                         * to userspace.
2455                         */
2456                        if (!*s) {
2457                                /* reclaim and warn, but no oom */
2458                                gfp = mapping_gfp_mask(mapping);
2459
2460                                /* Our bo are always dirty and so we require
2461                                 * kswapd to reclaim our pages (direct reclaim
2462                                 * does not effectively begin pageout of our
2463                                 * buffers on its own). However, direct reclaim
2464                                 * only waits for kswapd when under allocation
2465                                 * congestion. So as a result __GFP_RECLAIM is
2466                                 * unreliable and fails to actually reclaim our
2467                                 * dirty pages -- unless you try over and over
2468                                 * again with !__GFP_NORETRY. However, we still
2469                                 * want to fail this allocation rather than
2470                                 * trigger the out-of-memory killer and for
2471                                 * this we want __GFP_RETRY_MAYFAIL.
2472                                 */
2473                                gfp |= __GFP_RETRY_MAYFAIL;
2474                        }
2475                } while (1);
2476
2477                if (!i ||
2478                    sg->length >= max_segment ||
2479                    page_to_pfn(page) != last_pfn + 1) {
2480                        if (i) {
2481                                sg_page_sizes |= sg->length;
2482                                sg = sg_next(sg);
2483                        }
2484                        st->nents++;
2485                        sg_set_page(sg, page, PAGE_SIZE, 0);
2486                } else {
2487                        sg->length += PAGE_SIZE;
2488                }
2489                last_pfn = page_to_pfn(page);
2490
2491                /* Check that the i965g/gm workaround works. */
2492                WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2493        }
2494        if (sg) { /* loop terminated early; short sg table */
2495                sg_page_sizes |= sg->length;
2496                sg_mark_end(sg);
2497        }
2498
2499        /* Trim unused sg entries to avoid wasting memory. */
2500        i915_sg_trim(st);
2501
2502        ret = i915_gem_gtt_prepare_pages(obj, st);
2503        if (ret) {
2504                /* DMA remapping failed? One possible cause is that
2505                 * it could not reserve enough large entries, asking
2506                 * for PAGE_SIZE chunks instead may be helpful.
2507                 */
2508                if (max_segment > PAGE_SIZE) {
2509                        for_each_sgt_page(page, sgt_iter, st)
2510                                put_page(page);
2511                        sg_free_table(st);
2512
2513                        max_segment = PAGE_SIZE;
2514                        goto rebuild_st;
2515                } else {
2516                        dev_warn(&dev_priv->drm.pdev->dev,
2517                                 "Failed to DMA remap %lu pages\n",
2518                                 page_count);
2519                        goto err_pages;
2520                }
2521        }
2522
2523        if (i915_gem_object_needs_bit17_swizzle(obj))
2524                i915_gem_object_do_bit_17_swizzle(obj, st);
2525
2526        __i915_gem_object_set_pages(obj, st, sg_page_sizes);
2527
2528        return 0;
2529
2530err_sg:
2531        sg_mark_end(sg);
2532err_pages:
2533        for_each_sgt_page(page, sgt_iter, st)
2534                put_page(page);
2535        sg_free_table(st);
2536        kfree(st);
2537
2538        /* shmemfs first checks if there is enough memory to allocate the page
2539         * and reports ENOSPC should there be insufficient, along with the usual
2540         * ENOMEM for a genuine allocation failure.
2541         *
2542         * We use ENOSPC in our driver to mean that we have run out of aperture
2543         * space and so want to translate the error from shmemfs back to our
2544         * usual understanding of ENOMEM.
2545         */
2546        if (ret == -ENOSPC)
2547                ret = -ENOMEM;
2548
2549        return ret;
2550}
2551
2552void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2553                                 struct sg_table *pages,
2554                                 unsigned int sg_page_sizes)
2555{
2556        struct drm_i915_private *i915 = to_i915(obj->base.dev);
2557        unsigned long supported = INTEL_INFO(i915)->page_sizes;
2558        int i;
2559
2560        lockdep_assert_held(&obj->mm.lock);
2561
2562        obj->mm.get_page.sg_pos = pages->sgl;
2563        obj->mm.get_page.sg_idx = 0;
2564
2565        obj->mm.pages = pages;
2566
2567        if (i915_gem_object_is_tiled(obj) &&
2568            i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2569                GEM_BUG_ON(obj->mm.quirked);
2570                __i915_gem_object_pin_pages(obj);
2571                obj->mm.quirked = true;
2572        }
2573
2574        GEM_BUG_ON(!sg_page_sizes);
2575        obj->mm.page_sizes.phys = sg_page_sizes;
2576
2577        /*
2578         * Calculate the supported page-sizes which fit into the given
2579         * sg_page_sizes. This will give us the page-sizes which we may be able
2580         * to use opportunistically when later inserting into the GTT. For
2581         * example if phys=2G, then in theory we should be able to use 1G, 2M,
2582         * 64K or 4K pages, although in practice this will depend on a number of
2583         * other factors.
2584         */
2585        obj->mm.page_sizes.sg = 0;
2586        for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
2587                if (obj->mm.page_sizes.phys & ~0u << i)
2588                        obj->mm.page_sizes.sg |= BIT(i);
2589        }
2590        GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
2591
2592        spin_lock(&i915->mm.obj_lock);
2593        list_add(&obj->mm.link, &i915->mm.unbound_list);
2594        spin_unlock(&i915->mm.obj_lock);
2595}
2596
2597static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2598{
2599        int err;
2600
2601        if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2602                DRM_DEBUG("Attempting to obtain a purgeable object\n");
2603                return -EFAULT;
2604        }
2605
2606        err = obj->ops->get_pages(obj);
2607        GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
2608
2609        return err;
2610}
2611
2612/* Ensure that the associated pages are gathered from the backing storage
2613 * and pinned into our object. i915_gem_object_pin_pages() may be called
2614 * multiple times before they are released by a single call to
2615 * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2616 * either as a result of memory pressure (reaping pages under the shrinker)
2617 * or as the object is itself released.
2618 */
2619int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2620{
2621        int err;
2622
2623        err = mutex_lock_interruptible(&obj->mm.lock);
2624        if (err)
2625                return err;
2626
2627        if (unlikely(!i915_gem_object_has_pages(obj))) {
2628                GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2629
2630                err = ____i915_gem_object_get_pages(obj);
2631                if (err)
2632                        goto unlock;
2633
2634                smp_mb__before_atomic();
2635        }
2636        atomic_inc(&obj->mm.pages_pin_count);
2637
2638unlock:
2639        mutex_unlock(&obj->mm.lock);
2640        return err;
2641}
2642
2643/* The 'mapping' part of i915_gem_object_pin_map() below */
2644static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2645                                 enum i915_map_type type)
2646{
2647        unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2648        struct sg_table *sgt = obj->mm.pages;
2649        struct sgt_iter sgt_iter;
2650        struct page *page;
2651        struct page *stack_pages[32];
2652        struct page **pages = stack_pages;
2653        unsigned long i = 0;
2654        pgprot_t pgprot;
2655        void *addr;
2656
2657        /* A single page can always be kmapped */
2658        if (n_pages == 1 && type == I915_MAP_WB)
2659                return kmap(sg_page(sgt->sgl));
2660
2661        if (n_pages > ARRAY_SIZE(stack_pages)) {
2662                /* Too big for stack -- allocate temporary array instead */
2663                pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
2664                if (!pages)
2665                        return NULL;
2666        }
2667
2668        for_each_sgt_page(page, sgt_iter, sgt)
2669                pages[i++] = page;
2670
2671        /* Check that we have the expected number of pages */
2672        GEM_BUG_ON(i != n_pages);
2673
2674        switch (type) {
2675        default:
2676                MISSING_CASE(type);
2677                /* fallthrough to use PAGE_KERNEL anyway */
2678        case I915_MAP_WB:
2679                pgprot = PAGE_KERNEL;
2680                break;
2681        case I915_MAP_WC:
2682                pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2683                break;
2684        }
2685        addr = vmap(pages, n_pages, 0, pgprot);
2686
2687        if (pages != stack_pages)
2688                kvfree(pages);
2689
2690        return addr;
2691}
2692
2693/* get, pin, and map the pages of the object into kernel space */
2694void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2695                              enum i915_map_type type)
2696{
2697        enum i915_map_type has_type;
2698        bool pinned;
2699        void *ptr;
2700        int ret;
2701
2702        if (unlikely(!i915_gem_object_has_struct_page(obj)))
2703                return ERR_PTR(-ENXIO);
2704
2705        ret = mutex_lock_interruptible(&obj->mm.lock);
2706        if (ret)
2707                return ERR_PTR(ret);
2708
2709        pinned = !(type & I915_MAP_OVERRIDE);
2710        type &= ~I915_MAP_OVERRIDE;
2711
2712        if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2713                if (unlikely(!i915_gem_object_has_pages(obj))) {
2714                        GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2715
2716                        ret = ____i915_gem_object_get_pages(obj);
2717                        if (ret)
2718                                goto err_unlock;
2719
2720                        smp_mb__before_atomic();
2721                }
2722                atomic_inc(&obj->mm.pages_pin_count);
2723                pinned = false;
2724        }
2725        GEM_BUG_ON(!i915_gem_object_has_pages(obj));
2726
2727        ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2728        if (ptr && has_type != type) {
2729                if (pinned) {
2730                        ret = -EBUSY;
2731                        goto err_unpin;
2732                }
2733
2734                if (is_vmalloc_addr(ptr))
2735                        vunmap(ptr);
2736                else
2737                        kunmap(kmap_to_page(ptr));
2738
2739                ptr = obj->mm.mapping = NULL;
2740        }
2741
2742        if (!ptr) {
2743                ptr = i915_gem_object_map(obj, type);
2744                if (!ptr) {
2745                        ret = -ENOMEM;
2746                        goto err_unpin;
2747                }
2748
2749                obj->mm.mapping = page_pack_bits(ptr, type);
2750        }
2751
2752out_unlock:
2753        mutex_unlock(&obj->mm.lock);
2754        return ptr;
2755
2756err_unpin:
2757        atomic_dec(&obj->mm.pages_pin_count);
2758err_unlock:
2759        ptr = ERR_PTR(ret);
2760        goto out_unlock;
2761}
2762
2763static int
2764i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
2765                           const struct drm_i915_gem_pwrite *arg)
2766{
2767        struct address_space *mapping = obj->base.filp->f_mapping;
2768        char __user *user_data = u64_to_user_ptr(arg->data_ptr);
2769        u64 remain, offset;
2770        unsigned int pg;
2771
2772        /* Before we instantiate/pin the backing store for our use, we
2773         * can prepopulate the shmemfs filp efficiently using a write into
2774         * the pagecache. We avoid the penalty of instantiating all the
2775         * pages, important if the user is just writing to a few and never
2776         * uses the object on the GPU, and using a direct write into shmemfs
2777         * allows it to avoid the cost of retrieving a page (either swapin
2778         * or clearing-before-use) before it is overwritten.
2779         */
2780        if (i915_gem_object_has_pages(obj))
2781                return -ENODEV;
2782
2783        if (obj->mm.madv != I915_MADV_WILLNEED)
2784                return -EFAULT;
2785
2786        /* Before the pages are instantiated the object is treated as being
2787         * in the CPU domain. The pages will be clflushed as required before
2788         * use, and we can freely write into the pages directly. If userspace
2789         * races pwrite with any other operation; corruption will ensue -
2790         * that is userspace's prerogative!
2791         */
2792
2793        remain = arg->size;
2794        offset = arg->offset;
2795        pg = offset_in_page(offset);
2796
2797        do {
2798                unsigned int len, unwritten;
2799                struct page *page;
2800                void *data, *vaddr;
2801                int err;
2802
2803                len = PAGE_SIZE - pg;
2804                if (len > remain)
2805                        len = remain;
2806
2807                err = pagecache_write_begin(obj->base.filp, mapping,
2808                                            offset, len, 0,
2809                                            &page, &data);
2810                if (err < 0)
2811                        return err;
2812
2813                vaddr = kmap(page);
2814                unwritten = copy_from_user(vaddr + pg, user_data, len);
2815                kunmap(page);
2816
2817                err = pagecache_write_end(obj->base.filp, mapping,
2818                                          offset, len, len - unwritten,
2819                                          page, data);
2820                if (err < 0)
2821                        return err;
2822
2823                if (unwritten)
2824                        return -EFAULT;
2825
2826                remain -= len;
2827                user_data += len;
2828                offset += len;
2829                pg = 0;
2830        } while (remain);
2831
2832        return 0;
2833}
2834
2835static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
2836{
2837        bool banned;
2838
2839        atomic_inc(&ctx->guilty_count);
2840
2841        banned = false;
2842        if (i915_gem_context_is_bannable(ctx)) {
2843                unsigned int score;
2844
2845                score = atomic_add_return(CONTEXT_SCORE_GUILTY,
2846                                          &ctx->ban_score);
2847                banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
2848
2849                DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
2850                                 ctx->name, score, yesno(banned));
2851        }
2852        if (!banned)
2853                return;
2854
2855        i915_gem_context_set_banned(ctx);
2856        if (!IS_ERR_OR_NULL(ctx->file_priv)) {
2857                atomic_inc(&ctx->file_priv->context_bans);
2858                DRM_DEBUG_DRIVER("client %s has had %d context banned\n",
2859                                 ctx->name, atomic_read(&ctx->file_priv->context_bans));
2860        }
2861}
2862
2863static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
2864{
2865        atomic_inc(&ctx->active_count);
2866}
2867
2868struct i915_request *
2869i915_gem_find_active_request(struct intel_engine_cs *engine)
2870{
2871        struct i915_request *request, *active = NULL;
2872        unsigned long flags;
2873
2874        /* We are called by the error capture and reset at a random
2875         * point in time. In particular, note that neither is crucially
2876         * ordered with an interrupt. After a hang, the GPU is dead and we
2877         * assume that no more writes can happen (we waited long enough for
2878         * all writes that were in transaction to be flushed) - adding an
2879         * extra delay for a recent interrupt is pointless. Hence, we do
2880         * not need an engine->irq_seqno_barrier() before the seqno reads.
2881         */
2882        spin_lock_irqsave(&engine->timeline->lock, flags);
2883        list_for_each_entry(request, &engine->timeline->requests, link) {
2884                if (__i915_request_completed(request, request->global_seqno))
2885                        continue;
2886
2887                GEM_BUG_ON(request->engine != engine);
2888                GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
2889                                    &request->fence.flags));
2890
2891                active = request;
2892                break;
2893        }
2894        spin_unlock_irqrestore(&engine->timeline->lock, flags);
2895
2896        return active;
2897}
2898
2899static bool engine_stalled(struct intel_engine_cs *engine)
2900{
2901        if (!engine->hangcheck.stalled)
2902                return false;
2903
2904        /* Check for possible seqno movement after hang declaration */
2905        if (engine->hangcheck.seqno != intel_engine_get_seqno(engine)) {
2906                DRM_DEBUG_DRIVER("%s pardoned\n", engine->name);
2907                return false;
2908        }
2909
2910        return true;
2911}
2912
2913/*
2914 * Ensure irq handler finishes, and not run again.
2915 * Also return the active request so that we only search for it once.
2916 */
2917struct i915_request *
2918i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
2919{
2920        struct i915_request *request = NULL;
2921
2922        /*
2923         * During the reset sequence, we must prevent the engine from
2924         * entering RC6. As the context state is undefined until we restart
2925         * the engine, if it does enter RC6 during the reset, the state
2926         * written to the powercontext is undefined and so we may lose
2927         * GPU state upon resume, i.e. fail to restart after a reset.
2928         */
2929        intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
2930
2931        /*
2932         * Prevent the signaler thread from updating the request
2933         * state (by calling dma_fence_signal) as we are processing
2934         * the reset. The write from the GPU of the seqno is
2935         * asynchronous and the signaler thread may see a different
2936         * value to us and declare the request complete, even though
2937         * the reset routine have picked that request as the active
2938         * (incomplete) request. This conflict is not handled
2939         * gracefully!
2940         */
2941        kthread_park(engine->breadcrumbs.signaler);
2942
2943        /*
2944         * Prevent request submission to the hardware until we have
2945         * completed the reset in i915_gem_reset_finish(). If a request
2946         * is completed by one engine, it may then queue a request
2947         * to a second via its execlists->tasklet *just* as we are
2948         * calling engine->init_hw() and also writing the ELSP.
2949         * Turning off the execlists->tasklet until the reset is over
2950         * prevents the race.
2951         *
2952         * Note that this needs to be a single atomic operation on the
2953         * tasklet (flush existing tasks, prevent new tasks) to prevent
2954         * a race between reset and set-wedged. It is not, so we do the best
2955         * we can atm and make sure we don't lock the machine up in the more
2956         * common case of recursively being called from set-wedged from inside
2957         * i915_reset.
2958         */
2959        if (!atomic_read(&engine->execlists.tasklet.count))
2960                tasklet_kill(&engine->execlists.tasklet);
2961        tasklet_disable(&engine->execlists.tasklet);
2962
2963        /*
2964         * We're using worker to queue preemption requests from the tasklet in
2965         * GuC submission mode.
2966         * Even though tasklet was disabled, we may still have a worker queued.
2967         * Let's make sure that all workers scheduled before disabling the
2968         * tasklet are completed before continuing with the reset.
2969         */
2970        if (engine->i915->guc.preempt_wq)
2971                flush_workqueue(engine->i915->guc.preempt_wq);
2972
2973        if (engine->irq_seqno_barrier)
2974                engine->irq_seqno_barrier(engine);
2975
2976        request = i915_gem_find_active_request(engine);
2977        if (request && request->fence.error == -EIO)
2978                request = ERR_PTR(-EIO); /* Previous reset failed! */
2979
2980        return request;
2981}
2982
2983int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
2984{
2985        struct intel_engine_cs *engine;
2986        struct i915_request *request;
2987        enum intel_engine_id id;
2988        int err = 0;
2989
2990        for_each_engine(engine, dev_priv, id) {
2991                request = i915_gem_reset_prepare_engine(engine);
2992                if (IS_ERR(request)) {
2993                        err = PTR_ERR(request);
2994                        continue;
2995                }
2996
2997                engine->hangcheck.active_request = request;
2998        }
2999
3000        i915_gem_revoke_fences(dev_priv);
3001
3002        return err;
3003}
3004
3005static void skip_request(struct i915_request *request)
3006{
3007        void *vaddr = request->ring->vaddr;
3008        u32 head;
3009
3010        /* As this request likely depends on state from the lost
3011         * context, clear out all the user operations leaving the
3012         * breadcrumb at the end (so we get the fence notifications).
3013         */
3014        head = request->head;
3015        if (request->postfix < head) {
3016                memset(vaddr + head, 0, request->ring->size - head);
3017                head = 0;
3018        }
3019        memset(vaddr + head, 0, request->postfix - head);
3020
3021        dma_fence_set_error(&request->fence, -EIO);
3022}
3023
3024static void engine_skip_context(struct i915_request *request)
3025{
3026        struct intel_engine_cs *engine = request->engine;
3027        struct i915_gem_context *hung_ctx = request->ctx;
3028        struct intel_timeline *timeline;
3029        unsigned long flags;
3030
3031        timeline = i915_gem_context_lookup_timeline(hung_ctx, engine);
3032
3033        spin_lock_irqsave(&engine->timeline->lock, flags);
3034        spin_lock(&timeline->lock);
3035
3036        list_for_each_entry_continue(request, &engine->timeline->requests, link)
3037                if (request->ctx == hung_ctx)
3038                        skip_request(request);
3039
3040        list_for_each_entry(request, &timeline->requests, link)
3041                skip_request(request);
3042
3043        spin_unlock(&timeline->lock);
3044        spin_unlock_irqrestore(&engine->timeline->lock, flags);
3045}
3046
3047/* Returns the request if it was guilty of the hang */
3048static struct i915_request *
3049i915_gem_reset_request(struct intel_engine_cs *engine,
3050                       struct i915_request *request)
3051{
3052        /* The guilty request will get skipped on a hung engine.
3053         *
3054         * Users of client default contexts do not rely on logical
3055         * state preserved between batches so it is safe to execute
3056         * queued requests following the hang. Non default contexts
3057         * rely on preserved state, so skipping a batch loses the
3058         * evolution of the state and it needs to be considered corrupted.
3059         * Executing more queued batches on top of corrupted state is
3060         * risky. But we take the risk by trying to advance through
3061         * the queued requests in order to make the client behaviour
3062         * more predictable around resets, by not throwing away random
3063         * amount of batches it has prepared for execution. Sophisticated
3064         * clients can use gem_reset_stats_ioctl and dma fence status
3065         * (exported via sync_file info ioctl on explicit fences) to observe
3066         * when it loses the context state and should rebuild accordingly.
3067         *
3068         * The context ban, and ultimately the client ban, mechanism are safety
3069         * valves if client submission ends up resulting in nothing more than
3070         * subsequent hangs.
3071         */
3072
3073        if (engine_stalled(engine)) {
3074                i915_gem_context_mark_guilty(request->ctx);
3075                skip_request(request);
3076
3077                /* If this context is now banned, skip all pending requests. */
3078                if (i915_gem_context_is_banned(request->ctx))
3079                        engine_skip_context(request);
3080        } else {
3081                /*
3082                 * Since this is not the hung engine, it may have advanced
3083                 * since the hang declaration. Double check by refinding
3084                 * the active request at the time of the reset.
3085                 */
3086                request = i915_gem_find_active_request(engine);
3087                if (request) {
3088                        i915_gem_context_mark_innocent(request->ctx);
3089                        dma_fence_set_error(&request->fence, -EAGAIN);
3090
3091                        /* Rewind the engine to replay the incomplete rq */
3092                        spin_lock_irq(&engine->timeline->lock);
3093                        request = list_prev_entry(request, link);
3094                        if (&request->link == &engine->timeline->requests)
3095                                request = NULL;
3096                        spin_unlock_irq(&engine->timeline->lock);
3097                }
3098        }
3099
3100        return request;
3101}
3102
3103void i915_gem_reset_engine(struct intel_engine_cs *engine,
3104                           struct i915_request *request)
3105{
3106        /*
3107         * Make sure this write is visible before we re-enable the interrupt
3108         * handlers on another CPU, as tasklet_enable() resolves to just
3109         * a compiler barrier which is insufficient for our purpose here.
3110         */
3111        smp_store_mb(engine->irq_posted, 0);
3112
3113        if (request)
3114                request = i915_gem_reset_request(engine, request);
3115
3116        if (request) {
3117                DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
3118                                 engine->name, request->global_seqno);
3119        }
3120
3121        /* Setup the CS to resume from the breadcrumb of the hung request */
3122        engine->reset_hw(engine, request);
3123}
3124
3125void i915_gem_reset(struct drm_i915_private *dev_priv)
3126{
3127        struct intel_engine_cs *engine;
3128        enum intel_engine_id id;
3129
3130        lockdep_assert_held(&dev_priv->drm.struct_mutex);
3131
3132        i915_retire_requests(dev_priv);
3133
3134        for_each_engine(engine, dev_priv, id) {
3135                struct i915_gem_context *ctx;
3136
3137                i915_gem_reset_engine(engine, engine->hangcheck.active_request);
3138                ctx = fetch_and_zero(&engine->last_retired_context);
3139                if (ctx)
3140                        engine->context_unpin(engine, ctx);
3141
3142                /*
3143                 * Ostensibily, we always want a context loaded for powersaving,
3144                 * so if the engine is idle after the reset, send a request
3145                 * to load our scratch kernel_context.
3146                 *
3147                 * More mysteriously, if we leave the engine idle after a reset,
3148                 * the next userspace batch may hang, with what appears to be
3149                 * an incoherent read by the CS (presumably stale TLB). An
3150                 * empty request appears sufficient to paper over the glitch.
3151                 */
3152                if (intel_engine_is_idle(engine)) {
3153                        struct i915_request *rq;
3154
3155                        rq = i915_request_alloc(engine,
3156                                                dev_priv->kernel_context);
3157                        if (!IS_ERR(rq))
3158                                __i915_request_add(rq, false);
3159                }
3160        }
3161
3162        i915_gem_restore_fences(dev_priv);
3163
3164        if (dev_priv->gt.awake) {
3165                intel_sanitize_gt_powersave(dev_priv);
3166                intel_enable_gt_powersave(dev_priv);
3167                if (INTEL_GEN(dev_priv) >= 6)
3168                        gen6_rps_busy(dev_priv);
3169        }
3170}
3171
3172void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
3173{
3174        tasklet_enable(&engine->execlists.tasklet);
3175        kthread_unpark(engine->breadcrumbs.signaler);
3176
3177        intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
3178}
3179
3180void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
3181{
3182        struct intel_engine_cs *engine;
3183        enum intel_engine_id id;
3184
3185        lockdep_assert_held(&dev_priv->drm.struct_mutex);
3186
3187        for_each_engine(engine, dev_priv, id) {
3188                engine->hangcheck.active_request = NULL;
3189                i915_gem_reset_finish_engine(engine);
3190        }
3191}
3192
3193static void nop_submit_request(struct i915_request *request)
3194{
3195        dma_fence_set_error(&request->fence, -EIO);
3196
3197        i915_request_submit(request);
3198}
3199
3200static void nop_complete_submit_request(struct i915_request *request)
3201{
3202        unsigned long flags;
3203
3204        dma_fence_set_error(&request->fence, -EIO);
3205
3206        spin_lock_irqsave(&request->engine->timeline->lock, flags);
3207        __i915_request_submit(request);
3208        intel_engine_init_global_seqno(request->engine, request->global_seqno);
3209        spin_unlock_irqrestore(&request->engine->timeline->lock, flags);
3210}
3211
3212void i915_gem_set_wedged(struct drm_i915_private *i915)
3213{
3214        struct intel_engine_cs *engine;
3215        enum intel_engine_id id;
3216
3217        if (drm_debug & DRM_UT_DRIVER) {
3218                struct drm_printer p = drm_debug_printer(__func__);
3219
3220                for_each_engine(engine, i915, id)
3221                        intel_engine_dump(engine, &p, "%s\n", engine->name);
3222        }
3223
3224        set_bit(I915_WEDGED, &i915->gpu_error.flags);
3225        smp_mb__after_atomic();
3226
3227        /*
3228         * First, stop submission to hw, but do not yet complete requests by
3229         * rolling the global seqno forward (since this would complete requests
3230         * for which we haven't set the fence error to EIO yet).
3231         */
3232        for_each_engine(engine, i915, id) {
3233                i915_gem_reset_prepare_engine(engine);
3234
3235                engine->submit_request = nop_submit_request;
3236                engine->schedule = NULL;
3237        }
3238        i915->caps.scheduler = 0;
3239
3240        /*
3241         * Make sure no one is running the old callback before we proceed with
3242         * cancelling requests and resetting the completion tracking. Otherwise
3243         * we might submit a request to the hardware which never completes.
3244         */
3245        synchronize_rcu();
3246
3247        for_each_engine(engine, i915, id) {
3248                /* Mark all executing requests as skipped */
3249                engine->cancel_requests(engine);
3250
3251                /*
3252                 * Only once we've force-cancelled all in-flight requests can we
3253                 * start to complete all requests.
3254                 */
3255                engine->submit_request = nop_complete_submit_request;
3256        }
3257
3258        /*
3259         * Make sure no request can slip through without getting completed by
3260         * either this call here to intel_engine_init_global_seqno, or the one
3261         * in nop_complete_submit_request.
3262         */
3263        synchronize_rcu();
3264
3265        for_each_engine(engine, i915, id) {
3266                unsigned long flags;
3267
3268                /*
3269                 * Mark all pending requests as complete so that any concurrent
3270                 * (lockless) lookup doesn't try and wait upon the request as we
3271                 * reset it.
3272                 */
3273                spin_lock_irqsave(&engine->timeline->lock, flags);
3274                intel_engine_init_global_seqno(engine,
3275                                               intel_engine_last_submit(engine));
3276                spin_unlock_irqrestore(&engine->timeline->lock, flags);
3277
3278                i915_gem_reset_finish_engine(engine);
3279        }
3280
3281        wake_up_all(&i915->gpu_error.reset_queue);
3282}
3283
3284bool i915_gem_unset_wedged(struct drm_i915_private *i915)
3285{
3286        struct i915_gem_timeline *tl;
3287        int i;
3288
3289        lockdep_assert_held(&i915->drm.struct_mutex);
3290        if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
3291                return true;
3292
3293        /* Before unwedging, make sure that all pending operations
3294         * are flushed and errored out - we may have requests waiting upon
3295         * third party fences. We marked all inflight requests as EIO, and
3296         * every execbuf since returned EIO, for consistency we want all
3297         * the currently pending requests to also be marked as EIO, which
3298         * is done inside our nop_submit_request - and so we must wait.
3299         *
3300         * No more can be submitted until we reset the wedged bit.
3301         */
3302        list_for_each_entry(tl, &i915->gt.timelines, link) {
3303                for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
3304                        struct i915_request *rq;
3305
3306                        rq = i915_gem_active_peek(&tl->engine[i].last_request,
3307                                                  &i915->drm.struct_mutex);
3308                        if (!rq)
3309                                continue;
3310
3311                        /* We can't use our normal waiter as we want to
3312                         * avoid recursively trying to handle the current
3313                         * reset. The basic dma_fence_default_wait() installs
3314                         * a callback for dma_fence_signal(), which is
3315                         * triggered by our nop handler (indirectly, the
3316                         * callback enables the signaler thread which is
3317                         * woken by the nop_submit_request() advancing the seqno
3318                         * and when the seqno passes the fence, the signaler
3319                         * then signals the fence waking us up).
3320                         */
3321                        if (dma_fence_default_wait(&rq->fence, true,
3322                                                   MAX_SCHEDULE_TIMEOUT) < 0)
3323                                return false;
3324                }
3325        }
3326
3327        /* Undo nop_submit_request. We prevent all new i915 requests from
3328         * being queued (by disallowing execbuf whilst wedged) so having
3329         * waited for all active requests above, we know the system is idle
3330         * and do not have to worry about a thread being inside
3331         * engine->submit_request() as we swap over. So unlike installing
3332         * the nop_submit_request on reset, we can do this from normal
3333         * context and do not require stop_machine().
3334         */
3335        intel_engines_reset_default_submission(i915);
3336        i915_gem_contexts_lost(i915);
3337
3338        smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
3339        clear_bit(I915_WEDGED, &i915->gpu_error.flags);
3340
3341        return true;
3342}
3343
3344static void
3345i915_gem_retire_work_handler(struct work_struct *work)
3346{
3347        struct drm_i915_private *dev_priv =
3348                container_of(work, typeof(*dev_priv), gt.retire_work.work);
3349        struct drm_device *dev = &dev_priv->drm;
3350
3351        /* Come back later if the device is busy... */
3352        if (mutex_trylock(&dev->struct_mutex)) {
3353                i915_retire_requests(dev_priv);
3354                mutex_unlock(&dev->struct_mutex);
3355        }
3356
3357        /*
3358         * Keep the retire handler running until we are finally idle.
3359         * We do not need to do this test under locking as in the worst-case
3360         * we queue the retire worker once too often.
3361         */
3362        if (READ_ONCE(dev_priv->gt.awake))
3363                queue_delayed_work(dev_priv->wq,
3364                                   &dev_priv->gt.retire_work,
3365                                   round_jiffies_up_relative(HZ));
3366}
3367
3368static void shrink_caches(struct drm_i915_private *i915)
3369{
3370        /*
3371         * kmem_cache_shrink() discards empty slabs and reorders partially
3372         * filled slabs to prioritise allocating from the mostly full slabs,
3373         * with the aim of reducing fragmentation.
3374         */
3375        kmem_cache_shrink(i915->priorities);
3376        kmem_cache_shrink(i915->dependencies);
3377        kmem_cache_shrink(i915->requests);
3378        kmem_cache_shrink(i915->luts);
3379        kmem_cache_shrink(i915->vmas);
3380        kmem_cache_shrink(i915->objects);
3381}
3382
3383struct sleep_rcu_work {
3384        union {
3385                struct rcu_head rcu;
3386                struct work_struct work;
3387        };
3388        struct drm_i915_private *i915;
3389        unsigned int epoch;
3390};
3391
3392static inline bool
3393same_epoch(struct drm_i915_private *i915, unsigned int epoch)
3394{
3395        /*
3396         * There is a small chance that the epoch wrapped since we started
3397         * sleeping. If we assume that epoch is at least a u32, then it will
3398         * take at least 2^32 * 100ms for it to wrap, or about 326 years.
3399         */
3400        return epoch == READ_ONCE(i915->gt.epoch);
3401}
3402
3403static void __sleep_work(struct work_struct *work)
3404{
3405        struct sleep_rcu_work *s = container_of(work, typeof(*s), work);
3406        struct drm_i915_private *i915 = s->i915;
3407        unsigned int epoch = s->epoch;
3408
3409        kfree(s);
3410        if (same_epoch(i915, epoch))
3411                shrink_caches(i915);
3412}
3413
3414static void __sleep_rcu(struct rcu_head *rcu)
3415{
3416        struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu);
3417        struct drm_i915_private *i915 = s->i915;
3418
3419        if (same_epoch(i915, s->epoch)) {
3420                INIT_WORK(&s->work, __sleep_work);
3421                queue_work(i915->wq, &s->work);
3422        } else {
3423                kfree(s);
3424        }
3425}
3426
3427static inline bool
3428new_requests_since_last_retire(const struct drm_i915_private *i915)
3429{
3430        return (READ_ONCE(i915->gt.active_requests) ||
3431                work_pending(&i915->gt.idle_work.work));
3432}
3433
3434static void
3435i915_gem_idle_work_handler(struct work_struct *work)
3436{
3437        struct drm_i915_private *dev_priv =
3438                container_of(work, typeof(*dev_priv), gt.idle_work.work);
3439        unsigned int epoch = I915_EPOCH_INVALID;
3440        bool rearm_hangcheck;
3441
3442        if (!READ_ONCE(dev_priv->gt.awake))
3443                return;
3444
3445        /*
3446         * Wait for last execlists context complete, but bail out in case a
3447         * new request is submitted. As we don't trust the hardware, we
3448         * continue on if the wait times out. This is necessary to allow
3449         * the machine to suspend even if the hardware dies, and we will
3450         * try to recover in resume (after depriving the hardware of power,
3451         * it may be in a better mmod).
3452         */
3453        __wait_for(if (new_requests_since_last_retire(dev_priv)) return,
3454                   intel_engines_are_idle(dev_priv),
3455                   I915_IDLE_ENGINES_TIMEOUT * 1000,
3456                   10, 500);
3457
3458        rearm_hangcheck =
3459                cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
3460
3461        if (!mutex_trylock(&dev_priv->drm.struct_mutex)) {
3462                /* Currently busy, come back later */
3463                mod_delayed_work(dev_priv->wq,
3464                                 &dev_priv->gt.idle_work,
3465                                 msecs_to_jiffies(50));
3466                goto out_rearm;
3467        }
3468
3469        /*
3470         * New request retired after this work handler started, extend active
3471         * period until next instance of the work.
3472         */
3473        if (new_requests_since_last_retire(dev_priv))
3474                goto out_unlock;
3475
3476        /*
3477         * Be paranoid and flush a concurrent interrupt to make sure
3478         * we don't reactivate any irq tasklets after parking.
3479         *
3480         * FIXME: Note that even though we have waited for execlists to be idle,
3481         * there may still be an in-flight interrupt even though the CSB
3482         * is now empty. synchronize_irq() makes sure that a residual interrupt
3483         * is completed before we continue, but it doesn't prevent the HW from
3484         * raising a spurious interrupt later. To complete the shield we should
3485         * coordinate disabling the CS irq with flushing the interrupts.
3486         */
3487        synchronize_irq(dev_priv->drm.irq);
3488
3489        intel_engines_park(dev_priv);
3490        i915_gem_timelines_park(dev_priv);
3491
3492        i915_pmu_gt_parked(dev_priv);
3493
3494        GEM_BUG_ON(!dev_priv->gt.awake);
3495        dev_priv->gt.awake = false;
3496        epoch = dev_priv->gt.epoch;
3497        GEM_BUG_ON(epoch == I915_EPOCH_INVALID);
3498        rearm_hangcheck = false;
3499
3500        if (INTEL_GEN(dev_priv) >= 6)
3501                gen6_rps_idle(dev_priv);
3502
3503        intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
3504
3505        intel_runtime_pm_put(dev_priv);
3506out_unlock:
3507        mutex_unlock(&dev_priv->drm.struct_mutex);
3508
3509out_rearm:
3510        if (rearm_hangcheck) {
3511                GEM_BUG_ON(!dev_priv->gt.awake);
3512                i915_queue_hangcheck(dev_priv);
3513        }
3514
3515        /*
3516         * When we are idle, it is an opportune time to reap our caches.
3517         * However, we have many objects that utilise RCU and the ordered
3518         * i915->wq that this work is executing on. To try and flush any
3519         * pending frees now we are idle, we first wait for an RCU grace
3520         * period, and then queue a task (that will run last on the wq) to
3521         * shrink and re-optimize the caches.
3522         */
3523        if (same_epoch(dev_priv, epoch)) {
3524                struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL);
3525                if (s) {
3526                        s->i915 = dev_priv;
3527                        s->epoch = epoch;
3528                        call_rcu(&s->rcu, __sleep_rcu);
3529                }
3530        }
3531}
3532
3533void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
3534{
3535        struct drm_i915_private *i915 = to_i915(gem->dev);
3536        struct drm_i915_gem_object *obj = to_intel_bo(gem);
3537        struct drm_i915_file_private *fpriv = file->driver_priv;
3538        struct i915_lut_handle *lut, *ln;
3539
3540        mutex_lock(&i915->drm.struct_mutex);
3541
3542        list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
3543                struct i915_gem_context *ctx = lut->ctx;
3544                struct i915_vma *vma;
3545
3546                GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
3547                if (ctx->file_priv != fpriv)
3548                        continue;
3549
3550                vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
3551                GEM_BUG_ON(vma->obj != obj);
3552
3553                /* We allow the process to have multiple handles to the same
3554                 * vma, in the same fd namespace, by virtue of flink/open.
3555                 */
3556                GEM_BUG_ON(!vma->open_count);
3557                if (!--vma->open_count && !i915_vma_is_ggtt(vma))
3558                        i915_vma_close(vma);
3559
3560                list_del(&lut->obj_link);
3561                list_del(&lut->ctx_link);
3562
3563                kmem_cache_free(i915->luts, lut);
3564                __i915_gem_object_release_unless_active(obj);
3565        }
3566
3567        mutex_unlock(&i915->drm.struct_mutex);
3568}
3569
3570static unsigned long to_wait_timeout(s64 timeout_ns)
3571{
3572        if (timeout_ns < 0)
3573                return MAX_SCHEDULE_TIMEOUT;
3574
3575        if (timeout_ns == 0)
3576                return 0;
3577
3578        return nsecs_to_jiffies_timeout(timeout_ns);
3579}
3580
3581/**
3582 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3583 * @dev: drm device pointer
3584 * @data: ioctl data blob
3585 * @file: drm file pointer
3586 *
3587 * Returns 0 if successful, else an error is returned with the remaining time in
3588 * the timeout parameter.
3589 *  -ETIME: object is still busy after timeout
3590 *  -ERESTARTSYS: signal interrupted the wait
3591 *  -ENONENT: object doesn't exist
3592 * Also possible, but rare:
3593 *  -EAGAIN: incomplete, restart syscall
3594 *  -ENOMEM: damn
3595 *  -ENODEV: Internal IRQ fail
3596 *  -E?: The add request failed
3597 *
3598 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3599 * non-zero timeout parameter the wait ioctl will wait for the given number of
3600 * nanoseconds on an object becoming unbusy. Since the wait itself does so
3601 * without holding struct_mutex the object may become re-busied before this
3602 * function completes. A similar but shorter * race condition exists in the busy
3603 * ioctl
3604 */
3605int
3606i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3607{
3608        struct drm_i915_gem_wait *args = data;
3609        struct drm_i915_gem_object *obj;
3610        ktime_t start;
3611        long ret;
3612
3613        if (args->flags != 0)
3614                return -EINVAL;
3615
3616        obj = i915_gem_object_lookup(file, args->bo_handle);
3617        if (!obj)
3618                return -ENOENT;
3619
3620        start = ktime_get();
3621
3622        ret = i915_gem_object_wait(obj,
3623                                   I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
3624                                   to_wait_timeout(args->timeout_ns),
3625                                   to_rps_client(file));
3626
3627        if (args->timeout_ns > 0) {
3628                args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
3629                if (args->timeout_ns < 0)
3630                        args->timeout_ns = 0;
3631
3632                /*
3633                 * Apparently ktime isn't accurate enough and occasionally has a
3634                 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
3635                 * things up to make the test happy. We allow up to 1 jiffy.
3636                 *
3637                 * This is a regression from the timespec->ktime conversion.
3638                 */
3639                if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
3640                        args->timeout_ns = 0;
3641
3642                /* Asked to wait beyond the jiffie/scheduler precision? */
3643                if (ret == -ETIME && args->timeout_ns)
3644                        ret = -EAGAIN;
3645        }
3646
3647        i915_gem_object_put(obj);
3648        return ret;
3649}
3650
3651static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
3652{
3653        int ret, i;
3654
3655        for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
3656                ret = i915_gem_active_wait(&tl->engine[i].last_request, flags);
3657                if (ret)
3658                        return ret;
3659        }
3660
3661        return 0;
3662}
3663
3664static int wait_for_engines(struct drm_i915_private *i915)
3665{
3666        if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
3667                dev_err(i915->drm.dev,
3668                        "Failed to idle engines, declaring wedged!\n");
3669                if (drm_debug & DRM_UT_DRIVER) {
3670                        struct drm_printer p = drm_debug_printer(__func__);
3671                        struct intel_engine_cs *engine;
3672                        enum intel_engine_id id;
3673
3674                        for_each_engine(engine, i915, id)
3675                                intel_engine_dump(engine, &p,
3676                                                  "%s\n", engine->name);
3677                }
3678
3679                i915_gem_set_wedged(i915);
3680                return -EIO;
3681        }
3682
3683        return 0;
3684}
3685
3686int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
3687{
3688        int ret;
3689
3690        /* If the device is asleep, we have no requests outstanding */
3691        if (!READ_ONCE(i915->gt.awake))
3692                return 0;
3693
3694        if (flags & I915_WAIT_LOCKED) {
3695                struct i915_gem_timeline *tl;
3696
3697                lockdep_assert_held(&i915->drm.struct_mutex);
3698
3699                list_for_each_entry(tl, &i915->gt.timelines, link) {
3700                        ret = wait_for_timeline(tl, flags);
3701                        if (ret)
3702                                return ret;
3703                }
3704                i915_retire_requests(i915);
3705
3706                ret = wait_for_engines(i915);
3707        } else {
3708                ret = wait_for_timeline(&i915->gt.global_timeline, flags);
3709        }
3710
3711        return ret;
3712}
3713
3714static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
3715{
3716        /*
3717         * We manually flush the CPU domain so that we can override and
3718         * force the flush for the display, and perform it asyncrhonously.
3719         */
3720        flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3721        if (obj->cache_dirty)
3722                i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
3723        obj->write_domain = 0;
3724}
3725
3726void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
3727{
3728        if (!READ_ONCE(obj->pin_global))
3729                return;
3730
3731        mutex_lock(&obj->base.dev->struct_mutex);
3732        __i915_gem_object_flush_for_display(obj);
3733        mutex_unlock(&obj->base.dev->struct_mutex);
3734}
3735
3736/**
3737 * Moves a single object to the WC read, and possibly write domain.
3738 * @obj: object to act on
3739 * @write: ask for write access or read only
3740 *
3741 * This function returns when the move is complete, including waiting on
3742 * flushes to occur.
3743 */
3744int
3745i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
3746{
3747        int ret;
3748
3749        lockdep_assert_held(&obj->base.dev->struct_mutex);
3750
3751        ret = i915_gem_object_wait(obj,
3752                                   I915_WAIT_INTERRUPTIBLE |
3753                                   I915_WAIT_LOCKED |
3754                                   (write ? I915_WAIT_ALL : 0),
3755                                   MAX_SCHEDULE_TIMEOUT,
3756                                   NULL);
3757        if (ret)
3758                return ret;
3759
3760        if (obj->write_domain == I915_GEM_DOMAIN_WC)
3761                return 0;
3762
3763        /* Flush and acquire obj->pages so that we are coherent through
3764         * direct access in memory with previous cached writes through
3765         * shmemfs and that our cache domain tracking remains valid.
3766         * For example, if the obj->filp was moved to swap without us
3767         * being notified and releasing the pages, we would mistakenly
3768         * continue to assume that the obj remained out of the CPU cached
3769         * domain.
3770         */
3771        ret = i915_gem_object_pin_pages(obj);
3772        if (ret)
3773                return ret;
3774
3775        flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
3776
3777        /* Serialise direct access to this object with the barriers for
3778         * coherent writes from the GPU, by effectively invalidating the
3779         * WC domain upon first access.
3780         */
3781        if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
3782                mb();
3783
3784        /* It should now be out of any other write domains, and we can update
3785         * the domain values for our changes.
3786         */
3787        GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
3788        obj->read_domains |= I915_GEM_DOMAIN_WC;
3789        if (write) {
3790                obj->read_domains = I915_GEM_DOMAIN_WC;
3791                obj->write_domain = I915_GEM_DOMAIN_WC;
3792                obj->mm.dirty = true;
3793        }
3794
3795        i915_gem_object_unpin_pages(obj);
3796        return 0;
3797}
3798
3799/**
3800 * Moves a single object to the GTT read, and possibly write domain.
3801 * @obj: object to act on
3802 * @write: ask for write access or read only
3803 *
3804 * This function returns when the move is complete, including waiting on
3805 * flushes to occur.
3806 */
3807int
3808i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3809{
3810        int ret;
3811
3812        lockdep_assert_held(&obj->base.dev->struct_mutex);
3813
3814        ret = i915_gem_object_wait(obj,
3815                                   I915_WAIT_INTERRUPTIBLE |
3816                                   I915_WAIT_LOCKED |
3817                                   (write ? I915_WAIT_ALL : 0),
3818                                   MAX_SCHEDULE_TIMEOUT,
3819                                   NULL);
3820        if (ret)
3821                return ret;
3822
3823        if (obj->write_domain == I915_GEM_DOMAIN_GTT)
3824                return 0;
3825
3826        /* Flush and acquire obj->pages so that we are coherent through
3827         * direct access in memory with previous cached writes through
3828         * shmemfs and that our cache domain tracking remains valid.
3829         * For example, if the obj->filp was moved to swap without us
3830         * being notified and releasing the pages, we would mistakenly
3831         * continue to assume that the obj remained out of the CPU cached
3832         * domain.
3833         */
3834        ret = i915_gem_object_pin_pages(obj);
3835        if (ret)
3836                return ret;
3837
3838        flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
3839
3840        /* Serialise direct access to this object with the barriers for
3841         * coherent writes from the GPU, by effectively invalidating the
3842         * GTT domain upon first access.
3843         */
3844        if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
3845                mb();
3846
3847        /* It should now be out of any other write domains, and we can update
3848         * the domain values for our changes.
3849         */
3850        GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3851        obj->read_domains |= I915_GEM_DOMAIN_GTT;
3852        if (write) {
3853                obj->read_domains = I915_GEM_DOMAIN_GTT;
3854                obj->write_domain = I915_GEM_DOMAIN_GTT;
3855                obj->mm.dirty = true;
3856        }
3857
3858        i915_gem_object_unpin_pages(obj);
3859        return 0;
3860}
3861
3862/**
3863 * Changes the cache-level of an object across all VMA.
3864 * @obj: object to act on
3865 * @cache_level: new cache level to set for the object
3866 *
3867 * After this function returns, the object will be in the new cache-level
3868 * across all GTT and the contents of the backing storage will be coherent,
3869 * with respect to the new cache-level. In order to keep the backing storage
3870 * coherent for all users, we only allow a single cache level to be set
3871 * globally on the object and prevent it from being changed whilst the
3872 * hardware is reading from the object. That is if the object is currently
3873 * on the scanout it will be set to uncached (or equivalent display
3874 * cache coherency) and all non-MOCS GPU access will also be uncached so
3875 * that all direct access to the scanout remains coherent.
3876 */
3877int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3878                                    enum i915_cache_level cache_level)
3879{
3880        struct i915_vma *vma;
3881        int ret;
3882
3883        lockdep_assert_held(&obj->base.dev->struct_mutex);
3884
3885        if (obj->cache_level == cache_level)
3886                return 0;
3887
3888        /* Inspect the list of currently bound VMA and unbind any that would
3889         * be invalid given the new cache-level. This is principally to
3890         * catch the issue of the CS prefetch crossing page boundaries and
3891         * reading an invalid PTE on older architectures.
3892         */
3893restart:
3894        list_for_each_entry(vma, &obj->vma_list, obj_link) {
3895                if (!drm_mm_node_allocated(&vma->node))
3896                        continue;
3897
3898                if (i915_vma_is_pinned(vma)) {
3899                        DRM_DEBUG("can not change the cache level of pinned objects\n");
3900                        return -EBUSY;
3901                }
3902
3903                if (!i915_vma_is_closed(vma) &&
3904                    i915_gem_valid_gtt_space(vma, cache_level))
3905                        continue;
3906
3907                ret = i915_vma_unbind(vma);
3908                if (ret)
3909                        return ret;
3910
3911                /* As unbinding may affect other elements in the
3912                 * obj->vma_list (due to side-effects from retiring
3913                 * an active vma), play safe and restart the iterator.
3914                 */
3915                goto restart;
3916        }
3917
3918        /* We can reuse the existing drm_mm nodes but need to change the
3919         * cache-level on the PTE. We could simply unbind them all and
3920         * rebind with the correct cache-level on next use. However since
3921         * we already have a valid slot, dma mapping, pages etc, we may as
3922         * rewrite the PTE in the belief that doing so tramples upon less
3923         * state and so involves less work.
3924         */
3925        if (obj->bind_count) {
3926                /* Before we change the PTE, the GPU must not be accessing it.
3927                 * If we wait upon the object, we know that all the bound
3928                 * VMA are no longer active.
3929                 */
3930                ret = i915_gem_object_wait(obj,
3931                                           I915_WAIT_INTERRUPTIBLE |
3932                                           I915_WAIT_LOCKED |
3933                                           I915_WAIT_ALL,
3934                                           MAX_SCHEDULE_TIMEOUT,
3935                                           NULL);
3936                if (ret)
3937                        return ret;
3938
3939                if (!HAS_LLC(to_i915(obj->base.dev)) &&
3940                    cache_level != I915_CACHE_NONE) {
3941                        /* Access to snoopable pages through the GTT is
3942                         * incoherent and on some machines causes a hard
3943                         * lockup. Relinquish the CPU mmaping to force
3944                         * userspace to refault in the pages and we can
3945                         * then double check if the GTT mapping is still
3946                         * valid for that pointer access.
3947                         */
3948                        i915_gem_release_mmap(obj);
3949
3950                        /* As we no longer need a fence for GTT access,
3951                         * we can relinquish it now (and so prevent having
3952                         * to steal a fence from someone else on the next
3953                         * fence request). Note GPU activity would have
3954                         * dropped the fence as all snoopable access is
3955                         * supposed to be linear.
3956                         */
3957                        for_each_ggtt_vma(vma, obj) {
3958                                ret = i915_vma_put_fence(vma);
3959                                if (ret)
3960                                        return ret;
3961                        }
3962                } else {
3963                        /* We either have incoherent backing store and
3964                         * so no GTT access or the architecture is fully
3965                         * coherent. In such cases, existing GTT mmaps
3966                         * ignore the cache bit in the PTE and we can
3967                         * rewrite it without confusing the GPU or having
3968                         * to force userspace to fault back in its mmaps.
3969                         */
3970                }
3971
3972                list_for_each_entry(vma, &obj->vma_list, obj_link) {
3973                        if (!drm_mm_node_allocated(&vma->node))
3974                                continue;
3975
3976                        ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3977                        if (ret)
3978                                return ret;
3979                }
3980        }
3981
3982        list_for_each_entry(vma, &obj->vma_list, obj_link)
3983                vma->node.color = cache_level;
3984        i915_gem_object_set_cache_coherency(obj, cache_level);
3985        obj->cache_dirty = true; /* Always invalidate stale cachelines */
3986
3987        return 0;
3988}
3989
3990int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3991                               struct drm_file *file)
3992{
3993        struct drm_i915_gem_caching *args = data;
3994        struct drm_i915_gem_object *obj;
3995        int err = 0;
3996
3997        rcu_read_lock();
3998        obj = i915_gem_object_lookup_rcu(file, args->handle);
3999        if (!obj) {
4000                err = -ENOENT;
4001                goto out;
4002        }
4003
4004        switch (obj->cache_level) {
4005        case I915_CACHE_LLC:
4006        case I915_CACHE_L3_LLC:
4007                args->caching = I915_CACHING_CACHED;
4008                break;
4009
4010        case I915_CACHE_WT:
4011                args->caching = I915_CACHING_DISPLAY;
4012                break;
4013
4014        default:
4015                args->caching = I915_CACHING_NONE;
4016                break;
4017        }
4018out:
4019        rcu_read_unlock();
4020        return err;
4021}
4022
4023int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
4024                               struct drm_file *file)
4025{
4026        struct drm_i915_private *i915 = to_i915(dev);
4027        struct drm_i915_gem_caching *args = data;
4028        struct drm_i915_gem_object *obj;
4029        enum i915_cache_level level;
4030        int ret = 0;
4031
4032        switch (args->caching) {
4033        case I915_CACHING_NONE:
4034                level = I915_CACHE_NONE;
4035                break;
4036        case I915_CACHING_CACHED:
4037                /*
4038                 * Due to a HW issue on BXT A stepping, GPU stores via a
4039                 * snooped mapping may leave stale data in a corresponding CPU
4040                 * cacheline, whereas normally such cachelines would get
4041                 * invalidated.
4042                 */
4043                if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
4044                        return -ENODEV;
4045
4046                level = I915_CACHE_LLC;
4047                break;
4048        case I915_CACHING_DISPLAY:
4049                level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
4050                break;
4051        default:
4052                return -EINVAL;
4053        }
4054
4055        obj = i915_gem_object_lookup(file, args->handle);
4056        if (!obj)
4057                return -ENOENT;
4058
4059        /*
4060         * The caching mode of proxy object is handled by its generator, and
4061         * not allowed to be changed by userspace.
4062         */
4063        if (i915_gem_object_is_proxy(obj)) {
4064                ret = -ENXIO;
4065                goto out;
4066        }
4067
4068        if (obj->cache_level == level)
4069                goto out;
4070
4071        ret = i915_gem_object_wait(obj,
4072                                   I915_WAIT_INTERRUPTIBLE,
4073                                   MAX_SCHEDULE_TIMEOUT,
4074                                   to_rps_client(file));
4075        if (ret)
4076                goto out;
4077
4078        ret = i915_mutex_lock_interruptible(dev);
4079        if (ret)
4080                goto out;
4081
4082        ret = i915_gem_object_set_cache_level(obj, level);
4083        mutex_unlock(&dev->struct_mutex);
4084
4085out:
4086        i915_gem_object_put(obj);
4087        return ret;
4088}
4089
4090/*
4091 * Prepare buffer for display plane (scanout, cursors, etc).
4092 * Can be called from an uninterruptible phase (modesetting) and allows
4093 * any flushes to be pipelined (for pageflips).
4094 */
4095struct i915_vma *
4096i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
4097                                     u32 alignment,
4098                                     const struct i915_ggtt_view *view,
4099                                     unsigned int flags)
4100{
4101        struct i915_vma *vma;
4102        int ret;
4103
4104        lockdep_assert_held(&obj->base.dev->struct_mutex);
4105
4106        /* Mark the global pin early so that we account for the
4107         * display coherency whilst setting up the cache domains.
4108         */
4109        obj->pin_global++;
4110
4111        /* The display engine is not coherent with the LLC cache on gen6.  As
4112         * a result, we make sure that the pinning that is about to occur is
4113         * done with uncached PTEs. This is lowest common denominator for all
4114         * chipsets.
4115         *
4116         * However for gen6+, we could do better by using the GFDT bit instead
4117         * of uncaching, which would allow us to flush all the LLC-cached data
4118         * with that bit in the PTE to main memory with just one PIPE_CONTROL.
4119         */
4120        ret = i915_gem_object_set_cache_level(obj,
4121                                              HAS_WT(to_i915(obj->base.dev)) ?
4122                                              I915_CACHE_WT : I915_CACHE_NONE);
4123        if (ret) {
4124                vma = ERR_PTR(ret);
4125                goto err_unpin_global;
4126        }
4127
4128        /* As the user may map the buffer once pinned in the display plane
4129         * (e.g. libkms for the bootup splash), we have to ensure that we
4130         * always use map_and_fenceable for all scanout buffers. However,
4131         * it may simply be too big to fit into mappable, in which case
4132         * put it anyway and hope that userspace can cope (but always first
4133         * try to preserve the existing ABI).
4134         */
4135        vma = ERR_PTR(-ENOSPC);
4136        if ((flags & PIN_MAPPABLE) == 0 &&
4137            (!view || view->type == I915_GGTT_VIEW_NORMAL))
4138                vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
4139                                               flags |
4140                                               PIN_MAPPABLE |
4141                                               PIN_NONBLOCK);
4142        if (IS_ERR(vma))
4143                vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
4144        if (IS_ERR(vma))
4145                goto err_unpin_global;
4146
4147        vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
4148
4149        /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
4150        __i915_gem_object_flush_for_display(obj);
4151        intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
4152
4153        /* It should now be out of any other write domains, and we can update
4154         * the domain values for our changes.
4155         */
4156        obj->read_domains |= I915_GEM_DOMAIN_GTT;
4157
4158        return vma;
4159
4160err_unpin_global:
4161        obj->pin_global--;
4162        return vma;
4163}
4164
4165void
4166i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
4167{
4168        lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
4169
4170        if (WARN_ON(vma->obj->pin_global == 0))
4171                return;
4172
4173        if (--vma->obj->pin_global == 0)
4174                vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
4175
4176        /* Bump the LRU to try and avoid premature eviction whilst flipping  */
4177        i915_gem_object_bump_inactive_ggtt(vma->obj);
4178
4179        i915_vma_unpin(vma);
4180}
4181
4182/**
4183 * Moves a single object to the CPU read, and possibly write domain.
4184 * @obj: object to act on
4185 * @write: requesting write or read-only access
4186 *
4187 * This function returns when the move is complete, including waiting on
4188 * flushes to occur.
4189 */
4190int
4191i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
4192{
4193        int ret;
4194
4195        lockdep_assert_held(&obj->base.dev->struct_mutex);
4196
4197        ret = i915_gem_object_wait(obj,
4198                                   I915_WAIT_INTERRUPTIBLE |
4199                                   I915_WAIT_LOCKED |
4200                                   (write ? I915_WAIT_ALL : 0),
4201                                   MAX_SCHEDULE_TIMEOUT,
4202                                   NULL);
4203        if (ret)
4204                return ret;
4205
4206        flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
4207
4208        /* Flush the CPU cache if it's still invalid. */
4209        if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4210                i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
4211                obj->read_domains |= I915_GEM_DOMAIN_CPU;
4212        }
4213
4214        /* It should now be out of any other write domains, and we can update
4215         * the domain values for our changes.
4216         */
4217        GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
4218
4219        /* If we're writing through the CPU, then the GPU read domains will
4220         * need to be invalidated at next use.
4221         */
4222        if (write)
4223                __start_cpu_write(obj);
4224
4225        return 0;
4226}
4227
4228/* Throttle our rendering by waiting until the ring has completed our requests
4229 * emitted over 20 msec ago.
4230 *
4231 * Note that if we were to use the current jiffies each time around the loop,
4232 * we wouldn't escape the function with any frames outstanding if the time to
4233 * render a frame was over 20ms.
4234 *
4235 * This should get us reasonable parallelism between CPU and GPU but also
4236 * relatively low latency when blocking on a particular request to finish.
4237 */
4238static int
4239i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
4240{
4241        struct drm_i915_private *dev_priv = to_i915(dev);
4242        struct drm_i915_file_private *file_priv = file->driver_priv;
4243        unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
4244        struct i915_request *request, *target = NULL;
4245        long ret;
4246
4247        /* ABI: return -EIO if already wedged */
4248        if (i915_terminally_wedged(&dev_priv->gpu_error))
4249                return -EIO;
4250
4251        spin_lock(&file_priv->mm.lock);
4252        list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
4253                if (time_after_eq(request->emitted_jiffies, recent_enough))
4254                        break;
4255
4256                if (target) {
4257                        list_del(&target->client_link);
4258                        target->file_priv = NULL;
4259                }
4260
4261                target = request;
4262        }
4263        if (target)
4264                i915_request_get(target);
4265        spin_unlock(&file_priv->mm.lock);
4266
4267        if (target == NULL)
4268                return 0;
4269
4270        ret = i915_request_wait(target,
4271                                I915_WAIT_INTERRUPTIBLE,
4272                                MAX_SCHEDULE_TIMEOUT);
4273        i915_request_put(target);
4274
4275        return ret < 0 ? ret : 0;
4276}
4277
4278struct i915_vma *
4279i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
4280                         const struct i915_ggtt_view *view,
4281                         u64 size,
4282                         u64 alignment,
4283                         u64 flags)
4284{
4285        struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
4286        struct i915_address_space *vm = &dev_priv->ggtt.base;
4287        struct i915_vma *vma;
4288        int ret;
4289
4290        lockdep_assert_held(&obj->base.dev->struct_mutex);
4291
4292        if (flags & PIN_MAPPABLE &&
4293            (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
4294                /* If the required space is larger than the available
4295                 * aperture, we will not able to find a slot for the
4296                 * object and unbinding the object now will be in
4297                 * vain. Worse, doing so may cause us to ping-pong
4298                 * the object in and out of the Global GTT and
4299                 * waste a lot of cycles under the mutex.
4300                 */
4301                if (obj->base.size > dev_priv->ggtt.mappable_end)
4302                        return ERR_PTR(-E2BIG);
4303
4304                /* If NONBLOCK is set the caller is optimistically
4305                 * trying to cache the full object within the mappable
4306                 * aperture, and *must* have a fallback in place for
4307                 * situations where we cannot bind the object. We
4308                 * can be a little more lax here and use the fallback
4309                 * more often to avoid costly migrations of ourselves
4310                 * and other objects within the aperture.
4311                 *
4312                 * Half-the-aperture is used as a simple heuristic.
4313                 * More interesting would to do search for a free
4314                 * block prior to making the commitment to unbind.
4315                 * That caters for the self-harm case, and with a
4316                 * little more heuristics (e.g. NOFAULT, NOEVICT)
4317                 * we could try to minimise harm to others.
4318                 */
4319                if (flags & PIN_NONBLOCK &&
4320                    obj->base.size > dev_priv->ggtt.mappable_end / 2)
4321                        return ERR_PTR(-ENOSPC);
4322        }
4323
4324        vma = i915_vma_instance(obj, vm, view);
4325        if (unlikely(IS_ERR(vma)))
4326                return vma;
4327
4328        if (i915_vma_misplaced(vma, size, alignment, flags)) {
4329                if (flags & PIN_NONBLOCK) {
4330                        if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
4331                                return ERR_PTR(-ENOSPC);
4332
4333                        if (flags & PIN_MAPPABLE &&
4334                            vma->fence_size > dev_priv->ggtt.mappable_end / 2)
4335                                return ERR_PTR(-ENOSPC);
4336                }
4337
4338                WARN(i915_vma_is_pinned(vma),
4339                     "bo is already pinned in ggtt with incorrect alignment:"
4340                     " offset=%08x, req.alignment=%llx,"
4341                     " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
4342                     i915_ggtt_offset(vma), alignment,
4343                     !!(flags & PIN_MAPPABLE),
4344                     i915_vma_is_map_and_fenceable(vma));
4345                ret = i915_vma_unbind(vma);
4346                if (ret)
4347                        return ERR_PTR(ret);
4348        }
4349
4350        ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
4351        if (ret)
4352                return ERR_PTR(ret);
4353
4354        return vma;
4355}
4356
4357static __always_inline unsigned int __busy_read_flag(unsigned int id)
4358{
4359        /* Note that we could alias engines in the execbuf API, but
4360         * that would be very unwise as it prevents userspace from
4361         * fine control over engine selection. Ahem.
4362         *
4363         * This should be something like EXEC_MAX_ENGINE instead of
4364         * I915_NUM_ENGINES.
4365         */
4366        BUILD_BUG_ON(I915_NUM_ENGINES > 16);
4367        return 0x10000 << id;
4368}
4369
4370static __always_inline unsigned int __busy_write_id(unsigned int id)
4371{
4372        /* The uABI guarantees an active writer is also amongst the read
4373         * engines. This would be true if we accessed the activity tracking
4374         * under the lock, but as we perform the lookup of the object and
4375         * its activity locklessly we can not guarantee that the last_write
4376         * being active implies that we have set the same engine flag from
4377         * last_read - hence we always set both read and write busy for
4378         * last_write.
4379         */
4380        return id | __busy_read_flag(id);
4381}
4382
4383static __always_inline unsigned int
4384__busy_set_if_active(const struct dma_fence *fence,
4385                     unsigned int (*flag)(unsigned int id))
4386{
4387        struct i915_request *rq;
4388
4389        /* We have to check the current hw status of the fence as the uABI
4390         * guarantees forward progress. We could rely on the idle worker
4391         * to eventually flush us, but to minimise latency just ask the
4392         * hardware.
4393         *
4394         * Note we only report on the status of native fences.
4395         */
4396        if (!dma_fence_is_i915(fence))
4397                return 0;
4398
4399        /* opencode to_request() in order to avoid const warnings */
4400        rq = container_of(fence, struct i915_request, fence);
4401        if (i915_request_completed(rq))
4402                return 0;
4403
4404        return flag(rq->engine->uabi_id);
4405}
4406
4407static __always_inline unsigned int
4408busy_check_reader(const struct dma_fence *fence)
4409{
4410        return __busy_set_if_active(fence, __busy_read_flag);
4411}
4412
4413static __always_inline unsigned int
4414busy_check_writer(const struct dma_fence *fence)
4415{
4416        if (!fence)
4417                return 0;
4418
4419        return __busy_set_if_active(fence, __busy_write_id);
4420}
4421
4422int
4423i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4424                    struct drm_file *file)
4425{
4426        struct drm_i915_gem_busy *args = data;
4427        struct drm_i915_gem_object *obj;
4428        struct reservation_object_list *list;
4429        unsigned int seq;
4430        int err;
4431
4432        err = -ENOENT;
4433        rcu_read_lock();
4434        obj = i915_gem_object_lookup_rcu(file, args->handle);
4435        if (!obj)
4436                goto out;
4437
4438        /* A discrepancy here is that we do not report the status of
4439         * non-i915 fences, i.e. even though we may report the object as idle,
4440         * a call to set-domain may still stall waiting for foreign rendering.
4441         * This also means that wait-ioctl may report an object as busy,
4442         * where busy-ioctl considers it idle.
4443         *
4444         * We trade the ability to warn of foreign fences to report on which
4445         * i915 engines are active for the object.
4446         *
4447         * Alternatively, we can trade that extra information on read/write
4448         * activity with
4449         *      args->busy =
4450         *              !reservation_object_test_signaled_rcu(obj->resv, true);
4451         * to report the overall busyness. This is what the wait-ioctl does.
4452         *
4453         */
4454retry:
4455        seq = raw_read_seqcount(&obj->resv->seq);
4456
4457        /* Translate the exclusive fence to the READ *and* WRITE engine */
4458        args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
4459
4460        /* Translate shared fences to READ set of engines */
4461        list = rcu_dereference(obj->resv->fence);
4462        if (list) {
4463                unsigned int shared_count = list->shared_count, i;
4464
4465                for (i = 0; i < shared_count; ++i) {
4466                        struct dma_fence *fence =
4467                                rcu_dereference(list->shared[i]);
4468
4469                        args->busy |= busy_check_reader(fence);
4470                }
4471        }
4472
4473        if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
4474                goto retry;
4475
4476        err = 0;
4477out:
4478        rcu_read_unlock();
4479        return err;
4480}
4481
4482int
4483i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4484                        struct drm_file *file_priv)
4485{
4486        return i915_gem_ring_throttle(dev, file_priv);
4487}
4488
4489int
4490i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4491                       struct drm_file *file_priv)
4492{
4493        struct drm_i915_private *dev_priv = to_i915(dev);
4494        struct drm_i915_gem_madvise *args = data;
4495        struct drm_i915_gem_object *obj;
4496        int err;
4497
4498        switch (args->madv) {
4499        case I915_MADV_DONTNEED:
4500        case I915_MADV_WILLNEED:
4501            break;
4502        default:
4503            return -EINVAL;
4504        }
4505
4506        obj = i915_gem_object_lookup(file_priv, args->handle);
4507        if (!obj)
4508                return -ENOENT;
4509
4510        err = mutex_lock_interruptible(&obj->mm.lock);
4511        if (err)
4512                goto out;
4513
4514        if (i915_gem_object_has_pages(obj) &&
4515            i915_gem_object_is_tiled(obj) &&
4516            dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4517                if (obj->mm.madv == I915_MADV_WILLNEED) {
4518                        GEM_BUG_ON(!obj->mm.quirked);
4519                        __i915_gem_object_unpin_pages(obj);
4520                        obj->mm.quirked = false;
4521                }
4522                if (args->madv == I915_MADV_WILLNEED) {
4523                        GEM_BUG_ON(obj->mm.quirked);
4524                        __i915_gem_object_pin_pages(obj);
4525                        obj->mm.quirked = true;
4526                }
4527        }
4528
4529        if (obj->mm.madv != __I915_MADV_PURGED)
4530                obj->mm.madv = args->madv;
4531
4532        /* if the object is no longer attached, discard its backing storage */
4533        if (obj->mm.madv == I915_MADV_DONTNEED &&
4534            !i915_gem_object_has_pages(obj))
4535                i915_gem_object_truncate(obj);
4536
4537        args->retained = obj->mm.madv != __I915_MADV_PURGED;
4538        mutex_unlock(&obj->mm.lock);
4539
4540out:
4541        i915_gem_object_put(obj);
4542        return err;
4543}
4544
4545static void
4546frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request)
4547{
4548        struct drm_i915_gem_object *obj =
4549                container_of(active, typeof(*obj), frontbuffer_write);
4550
4551        intel_fb_obj_flush(obj, ORIGIN_CS);
4552}
4553
4554void i915_gem_object_init(struct drm_i915_gem_object *obj,
4555                          const struct drm_i915_gem_object_ops *ops)
4556{
4557        mutex_init(&obj->mm.lock);
4558
4559        INIT_LIST_HEAD(&obj->vma_list);
4560        INIT_LIST_HEAD(&obj->lut_list);
4561        INIT_LIST_HEAD(&obj->batch_pool_link);
4562
4563        obj->ops = ops;
4564
4565        reservation_object_init(&obj->__builtin_resv);
4566        obj->resv = &obj->__builtin_resv;
4567
4568        obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
4569        init_request_active(&obj->frontbuffer_write, frontbuffer_retire);
4570
4571        obj->mm.madv = I915_MADV_WILLNEED;
4572        INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
4573        mutex_init(&obj->mm.get_page.lock);
4574
4575        i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
4576}
4577
4578static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4579        .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
4580                 I915_GEM_OBJECT_IS_SHRINKABLE,
4581
4582        .get_pages = i915_gem_object_get_pages_gtt,
4583        .put_pages = i915_gem_object_put_pages_gtt,
4584
4585        .pwrite = i915_gem_object_pwrite_gtt,
4586};
4587
4588static int i915_gem_object_create_shmem(struct drm_device *dev,
4589                                        struct drm_gem_object *obj,
4590                                        size_t size)
4591{
4592        struct drm_i915_private *i915 = to_i915(dev);
4593        unsigned long flags = VM_NORESERVE;
4594        struct file *filp;
4595
4596        drm_gem_private_object_init(dev, obj, size);
4597
4598        if (i915->mm.gemfs)
4599                filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
4600                                                 flags);
4601        else
4602                filp = shmem_file_setup("i915", size, flags);
4603
4604        if (IS_ERR(filp))
4605                return PTR_ERR(filp);
4606
4607        obj->filp = filp;
4608
4609        return 0;
4610}
4611
4612struct drm_i915_gem_object *
4613i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
4614{
4615        struct drm_i915_gem_object *obj;
4616        struct address_space *mapping;
4617        unsigned int cache_level;
4618        gfp_t mask;
4619        int ret;
4620
4621        /* There is a prevalence of the assumption that we fit the object's
4622         * page count inside a 32bit _signed_ variable. Let's document this and
4623         * catch if we ever need to fix it. In the meantime, if you do spot
4624         * such a local variable, please consider fixing!
4625         */
4626        if (size >> PAGE_SHIFT > INT_MAX)
4627                return ERR_PTR(-E2BIG);
4628
4629        if (overflows_type(size, obj->base.size))
4630                return ERR_PTR(-E2BIG);
4631
4632        obj = i915_gem_object_alloc(dev_priv);
4633        if (obj == NULL)
4634                return ERR_PTR(-ENOMEM);
4635
4636        ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size);
4637        if (ret)
4638                goto fail;
4639
4640        mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4641        if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) {
4642                /* 965gm cannot relocate objects above 4GiB. */
4643                mask &= ~__GFP_HIGHMEM;
4644                mask |= __GFP_DMA32;
4645        }
4646
4647        mapping = obj->base.filp->f_mapping;
4648        mapping_set_gfp_mask(mapping, mask);
4649        GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
4650
4651        i915_gem_object_init(obj, &i915_gem_object_ops);
4652
4653        obj->write_domain = I915_GEM_DOMAIN_CPU;
4654        obj->read_domains = I915_GEM_DOMAIN_CPU;
4655
4656        if (HAS_LLC(dev_priv))
4657                /* On some devices, we can have the GPU use the LLC (the CPU
4658                 * cache) for about a 10% performance improvement
4659                 * compared to uncached.  Graphics requests other than
4660                 * display scanout are coherent with the CPU in
4661                 * accessing this cache.  This means in this mode we
4662                 * don't need to clflush on the CPU side, and on the
4663                 * GPU side we only need to flush internal caches to
4664                 * get data visible to the CPU.
4665                 *
4666                 * However, we maintain the display planes as UC, and so
4667                 * need to rebind when first used as such.
4668                 */
4669                cache_level = I915_CACHE_LLC;
4670        else
4671                cache_level = I915_CACHE_NONE;
4672
4673        i915_gem_object_set_cache_coherency(obj, cache_level);
4674
4675        trace_i915_gem_object_create(obj);
4676
4677        return obj;
4678
4679fail:
4680        i915_gem_object_free(obj);
4681        return ERR_PTR(ret);
4682}
4683
4684static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4685{
4686        /* If we are the last user of the backing storage (be it shmemfs
4687         * pages or stolen etc), we know that the pages are going to be
4688         * immediately released. In this case, we can then skip copying
4689         * back the contents from the GPU.
4690         */
4691
4692        if (obj->mm.madv != I915_MADV_WILLNEED)
4693                return false;
4694
4695        if (obj->base.filp == NULL)
4696                return true;
4697
4698        /* At first glance, this looks racy, but then again so would be
4699         * userspace racing mmap against close. However, the first external
4700         * reference to the filp can only be obtained through the
4701         * i915_gem_mmap_ioctl() which safeguards us against the user
4702         * acquiring such a reference whilst we are in the middle of
4703         * freeing the object.
4704         */
4705        return atomic_long_read(&obj->base.filp->f_count) == 1;
4706}
4707
4708static void __i915_gem_free_objects(struct drm_i915_private *i915,
4709                                    struct llist_node *freed)
4710{
4711        struct drm_i915_gem_object *obj, *on;
4712
4713        intel_runtime_pm_get(i915);
4714        llist_for_each_entry_safe(obj, on, freed, freed) {
4715                struct i915_vma *vma, *vn;
4716
4717                trace_i915_gem_object_destroy(obj);
4718
4719                mutex_lock(&i915->drm.struct_mutex);
4720
4721                GEM_BUG_ON(i915_gem_object_is_active(obj));
4722                list_for_each_entry_safe(vma, vn,
4723                                         &obj->vma_list, obj_link) {
4724                        GEM_BUG_ON(i915_vma_is_active(vma));
4725                        vma->flags &= ~I915_VMA_PIN_MASK;
4726                        i915_vma_close(vma);
4727                }
4728                GEM_BUG_ON(!list_empty(&obj->vma_list));
4729                GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
4730
4731                /* This serializes freeing with the shrinker. Since the free
4732                 * is delayed, first by RCU then by the workqueue, we want the
4733                 * shrinker to be able to free pages of unreferenced objects,
4734                 * or else we may oom whilst there are plenty of deferred
4735                 * freed objects.
4736                 */
4737                if (i915_gem_object_has_pages(obj)) {
4738                        spin_lock(&i915->mm.obj_lock);
4739                        list_del_init(&obj->mm.link);
4740                        spin_unlock(&i915->mm.obj_lock);
4741                }
4742
4743                mutex_unlock(&i915->drm.struct_mutex);
4744
4745                GEM_BUG_ON(obj->bind_count);
4746                GEM_BUG_ON(obj->userfault_count);
4747                GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
4748                GEM_BUG_ON(!list_empty(&obj->lut_list));
4749
4750                if (obj->ops->release)
4751                        obj->ops->release(obj);
4752
4753                if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
4754                        atomic_set(&obj->mm.pages_pin_count, 0);
4755                __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
4756                GEM_BUG_ON(i915_gem_object_has_pages(obj));
4757
4758                if (obj->base.import_attach)
4759                        drm_prime_gem_destroy(&obj->base, NULL);
4760
4761                reservation_object_fini(&obj->__builtin_resv);
4762                drm_gem_object_release(&obj->base);
4763                i915_gem_info_remove_obj(i915, obj->base.size);
4764
4765                kfree(obj->bit_17);
4766                i915_gem_object_free(obj);
4767
4768                GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
4769                atomic_dec(&i915->mm.free_count);
4770
4771                if (on)
4772                        cond_resched();
4773        }
4774        intel_runtime_pm_put(i915);
4775}
4776
4777static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
4778{
4779        struct llist_node *freed;
4780
4781        /* Free the oldest, most stale object to keep the free_list short */
4782        freed = NULL;
4783        if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
4784                /* Only one consumer of llist_del_first() allowed */
4785                spin_lock(&i915->mm.free_lock);
4786                freed = llist_del_first(&i915->mm.free_list);
4787                spin_unlock(&i915->mm.free_lock);
4788        }
4789        if (unlikely(freed)) {
4790                freed->next = NULL;
4791                __i915_gem_free_objects(i915, freed);
4792        }
4793}
4794
4795static void __i915_gem_free_work(struct work_struct *work)
4796{
4797        struct drm_i915_private *i915 =
4798                container_of(work, struct drm_i915_private, mm.free_work);
4799        struct llist_node *freed;
4800
4801        /*
4802         * All file-owned VMA should have been released by this point through
4803         * i915_gem_close_object(), or earlier by i915_gem_context_close().
4804         * However, the object may also be bound into the global GTT (e.g.
4805         * older GPUs without per-process support, or for direct access through
4806         * the GTT either for the user or for scanout). Those VMA still need to
4807         * unbound now.
4808         */
4809
4810        spin_lock(&i915->mm.free_lock);
4811        while ((freed = llist_del_all(&i915->mm.free_list))) {
4812                spin_unlock(&i915->mm.free_lock);
4813
4814                __i915_gem_free_objects(i915, freed);
4815                if (need_resched())
4816                        return;
4817
4818                spin_lock(&i915->mm.free_lock);
4819        }
4820        spin_unlock(&i915->mm.free_lock);
4821}
4822
4823static void __i915_gem_free_object_rcu(struct rcu_head *head)
4824{
4825        struct drm_i915_gem_object *obj =
4826                container_of(head, typeof(*obj), rcu);
4827        struct drm_i915_private *i915 = to_i915(obj->base.dev);
4828
4829        /*
4830         * Since we require blocking on struct_mutex to unbind the freed
4831         * object from the GPU before releasing resources back to the
4832         * system, we can not do that directly from the RCU callback (which may
4833         * be a softirq context), but must instead then defer that work onto a
4834         * kthread. We use the RCU callback rather than move the freed object
4835         * directly onto the work queue so that we can mix between using the
4836         * worker and performing frees directly from subsequent allocations for
4837         * crude but effective memory throttling.
4838         */
4839        if (llist_add(&obj->freed, &i915->mm.free_list))
4840                queue_work(i915->wq, &i915->mm.free_work);
4841}
4842
4843void i915_gem_free_object(struct drm_gem_object *gem_obj)
4844{
4845        struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4846
4847        if (obj->mm.quirked)
4848                __i915_gem_object_unpin_pages(obj);
4849
4850        if (discard_backing_storage(obj))
4851                obj->mm.madv = I915_MADV_DONTNEED;
4852
4853        /*
4854         * Before we free the object, make sure any pure RCU-only
4855         * read-side critical sections are complete, e.g.
4856         * i915_gem_busy_ioctl(). For the corresponding synchronized
4857         * lookup see i915_gem_object_lookup_rcu().
4858         */
4859        atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
4860        call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
4861}
4862
4863void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
4864{
4865        lockdep_assert_held(&obj->base.dev->struct_mutex);
4866
4867        if (!i915_gem_object_has_active_reference(obj) &&
4868            i915_gem_object_is_active(obj))
4869                i915_gem_object_set_active_reference(obj);
4870        else
4871                i915_gem_object_put(obj);
4872}
4873
4874static void assert_kernel_context_is_current(struct drm_i915_private *i915)
4875{
4876        struct i915_gem_context *kernel_context = i915->kernel_context;
4877        struct intel_engine_cs *engine;
4878        enum intel_engine_id id;
4879
4880        for_each_engine(engine, i915, id) {
4881                GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline->last_request));
4882                GEM_BUG_ON(engine->last_retired_context != kernel_context);
4883        }
4884}
4885
4886void i915_gem_sanitize(struct drm_i915_private *i915)
4887{
4888        if (i915_terminally_wedged(&i915->gpu_error)) {
4889                mutex_lock(&i915->drm.struct_mutex);
4890                i915_gem_unset_wedged(i915);
4891                mutex_unlock(&i915->drm.struct_mutex);
4892        }
4893
4894        /*
4895         * If we inherit context state from the BIOS or earlier occupants
4896         * of the GPU, the GPU may be in an inconsistent state when we
4897         * try to take over. The only way to remove the earlier state
4898         * is by resetting. However, resetting on earlier gen is tricky as
4899         * it may impact the display and we are uncertain about the stability
4900         * of the reset, so this could be applied to even earlier gen.
4901         */
4902        if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
4903                WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
4904}
4905
4906int i915_gem_suspend(struct drm_i915_private *dev_priv)
4907{
4908        struct drm_device *dev = &dev_priv->drm;
4909        int ret;
4910
4911        intel_runtime_pm_get(dev_priv);
4912        intel_suspend_gt_powersave(dev_priv);
4913
4914        mutex_lock(&dev->struct_mutex);
4915
4916        /* We have to flush all the executing contexts to main memory so
4917         * that they can saved in the hibernation image. To ensure the last
4918         * context image is coherent, we have to switch away from it. That
4919         * leaves the dev_priv->kernel_context still active when
4920         * we actually suspend, and its image in memory may not match the GPU
4921         * state. Fortunately, the kernel_context is disposable and we do
4922         * not rely on its state.
4923         */
4924        if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
4925                ret = i915_gem_switch_to_kernel_context(dev_priv);
4926                if (ret)
4927                        goto err_unlock;
4928
4929                ret = i915_gem_wait_for_idle(dev_priv,
4930                                             I915_WAIT_INTERRUPTIBLE |
4931                                             I915_WAIT_LOCKED);
4932                if (ret && ret != -EIO)
4933                        goto err_unlock;
4934
4935                assert_kernel_context_is_current(dev_priv);
4936        }
4937        i915_gem_contexts_lost(dev_priv);
4938        mutex_unlock(&dev->struct_mutex);
4939
4940        intel_uc_suspend(dev_priv);
4941
4942        cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
4943        cancel_delayed_work_sync(&dev_priv->gt.retire_work);
4944
4945        /* As the idle_work is rearming if it detects a race, play safe and
4946         * repeat the flush until it is definitely idle.
4947         */
4948        drain_delayed_work(&dev_priv->gt.idle_work);
4949
4950        /* Assert that we sucessfully flushed all the work and
4951         * reset the GPU back to its idle, low power state.
4952         */
4953        WARN_ON(dev_priv->gt.awake);
4954        if (WARN_ON(!intel_engines_are_idle(dev_priv)))
4955                i915_gem_set_wedged(dev_priv); /* no hope, discard everything */
4956
4957        /*
4958         * Neither the BIOS, ourselves or any other kernel
4959         * expects the system to be in execlists mode on startup,
4960         * so we need to reset the GPU back to legacy mode. And the only
4961         * known way to disable logical contexts is through a GPU reset.
4962         *
4963         * So in order to leave the system in a known default configuration,
4964         * always reset the GPU upon unload and suspend. Afterwards we then
4965         * clean up the GEM state tracking, flushing off the requests and
4966         * leaving the system in a known idle state.
4967         *
4968         * Note that is of the upmost importance that the GPU is idle and
4969         * all stray writes are flushed *before* we dismantle the backing
4970         * storage for the pinned objects.
4971         *
4972         * However, since we are uncertain that resetting the GPU on older
4973         * machines is a good idea, we don't - just in case it leaves the
4974         * machine in an unusable condition.
4975         */
4976        i915_gem_sanitize(dev_priv);
4977
4978        intel_runtime_pm_put(dev_priv);
4979        return 0;
4980
4981err_unlock:
4982        mutex_unlock(&dev->struct_mutex);
4983        intel_runtime_pm_put(dev_priv);
4984        return ret;
4985}
4986
4987void i915_gem_resume(struct drm_i915_private *i915)
4988{
4989        WARN_ON(i915->gt.awake);
4990
4991        mutex_lock(&i915->drm.struct_mutex);
4992        intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
4993
4994        i915_gem_restore_gtt_mappings(i915);
4995        i915_gem_restore_fences(i915);
4996
4997        /*
4998         * As we didn't flush the kernel context before suspend, we cannot
4999         * guarantee that the context image is complete. So let's just reset
5000         * it and start again.
5001         */
5002        i915->gt.resume(i915);
5003
5004        if (i915_gem_init_hw(i915))
5005                goto err_wedged;
5006
5007        intel_uc_resume(i915);
5008
5009        /* Always reload a context for powersaving. */
5010        if (i915_gem_switch_to_kernel_context(i915))
5011                goto err_wedged;
5012
5013out_unlock:
5014        intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
5015        mutex_unlock(&i915->drm.struct_mutex);
5016        return;
5017
5018err_wedged:
5019        if (!i915_terminally_wedged(&i915->gpu_error)) {
5020                DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
5021                i915_gem_set_wedged(i915);
5022        }
5023        goto out_unlock;
5024}
5025
5026void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
5027{
5028        if (INTEL_GEN(dev_priv) < 5 ||
5029            dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
5030                return;
5031
5032        I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
5033                                 DISP_TILE_SURFACE_SWIZZLING);
5034
5035        if (IS_GEN5(dev_priv))
5036                return;
5037
5038        I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
5039        if (IS_GEN6(dev_priv))
5040                I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
5041        else if (IS_GEN7(dev_priv))
5042                I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
5043        else if (IS_GEN8(dev_priv))
5044                I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
5045        else
5046                BUG();
5047}
5048
5049static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
5050{
5051        I915_WRITE(RING_CTL(base), 0);
5052        I915_WRITE(RING_HEAD(base), 0);
5053        I915_WRITE(RING_TAIL(base), 0);
5054        I915_WRITE(RING_START(base), 0);
5055}
5056
5057static void init_unused_rings(struct drm_i915_private *dev_priv)
5058{
5059        if (IS_I830(dev_priv)) {
5060                init_unused_ring(dev_priv, PRB1_BASE);
5061                init_unused_ring(dev_priv, SRB0_BASE);
5062                init_unused_ring(dev_priv, SRB1_BASE);
5063                init_unused_ring(dev_priv, SRB2_BASE);
5064                init_unused_ring(dev_priv, SRB3_BASE);
5065        } else if (IS_GEN2(dev_priv)) {
5066                init_unused_ring(dev_priv, SRB0_BASE);
5067                init_unused_ring(dev_priv, SRB1_BASE);
5068        } else if (IS_GEN3(dev_priv)) {
5069                init_unused_ring(dev_priv, PRB1_BASE);
5070                init_unused_ring(dev_priv, PRB2_BASE);
5071        }
5072}
5073
5074static int __i915_gem_restart_engines(void *data)
5075{
5076        struct drm_i915_private *i915 = data;
5077        struct intel_engine_cs *engine;
5078        enum intel_engine_id id;
5079        int err;
5080
5081        for_each_engine(engine, i915, id) {
5082                err = engine->init_hw(engine);
5083                if (err) {
5084                        DRM_ERROR("Failed to restart %s (%d)\n",
5085                                  engine->name, err);
5086                        return err;
5087                }
5088        }
5089
5090        return 0;
5091}
5092
5093int i915_gem_init_hw(struct drm_i915_private *dev_priv)
5094{
5095        int ret;
5096
5097        dev_priv->gt.last_init_time = ktime_get();
5098
5099        /* Double layer security blanket, see i915_gem_init() */
5100        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5101
5102        if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
5103                I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
5104
5105        if (IS_HASWELL(dev_priv))
5106                I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
5107                           LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
5108
5109        if (HAS_PCH_NOP(dev_priv)) {
5110                if (IS_IVYBRIDGE(dev_priv)) {
5111                        u32 temp = I915_READ(GEN7_MSG_CTL);
5112                        temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
5113                        I915_WRITE(GEN7_MSG_CTL, temp);
5114                } else if (INTEL_GEN(dev_priv) >= 7) {
5115                        u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
5116                        temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
5117                        I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
5118                }
5119        }
5120
5121        i915_gem_init_swizzling(dev_priv);
5122
5123        /*
5124         * At least 830 can leave some of the unused rings
5125         * "active" (ie. head != tail) after resume which
5126         * will prevent c3 entry. Makes sure all unused rings
5127         * are totally idle.
5128         */
5129        init_unused_rings(dev_priv);
5130
5131        BUG_ON(!dev_priv->kernel_context);
5132        if (i915_terminally_wedged(&dev_priv->gpu_error)) {
5133                ret = -EIO;
5134                goto out;
5135        }
5136
5137        ret = i915_ppgtt_init_hw(dev_priv);
5138        if (ret) {
5139                DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
5140                goto out;
5141        }
5142
5143        /* We can't enable contexts until all firmware is loaded */
5144        ret = intel_uc_init_hw(dev_priv);
5145        if (ret) {
5146                DRM_ERROR("Enabling uc failed (%d)\n", ret);
5147                goto out;
5148        }
5149
5150        intel_mocs_init_l3cc_table(dev_priv);
5151
5152        /* Only when the HW is re-initialised, can we replay the requests */
5153        ret = __i915_gem_restart_engines(dev_priv);
5154out:
5155        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5156        return ret;
5157}
5158
5159static int __intel_engines_record_defaults(struct drm_i915_private *i915)
5160{
5161        struct i915_gem_context *ctx;
5162        struct intel_engine_cs *engine;
5163        enum intel_engine_id id;
5164        int err;
5165
5166        /*
5167         * As we reset the gpu during very early sanitisation, the current
5168         * register state on the GPU should reflect its defaults values.
5169         * We load a context onto the hw (with restore-inhibit), then switch
5170         * over to a second context to save that default register state. We
5171         * can then prime every new context with that state so they all start
5172         * from the same default HW values.
5173         */
5174
5175        ctx = i915_gem_context_create_kernel(i915, 0);
5176        if (IS_ERR(ctx))
5177                return PTR_ERR(ctx);
5178
5179        for_each_engine(engine, i915, id) {
5180                struct i915_request *rq;
5181
5182                rq = i915_request_alloc(engine, ctx);
5183                if (IS_ERR(rq)) {
5184                        err = PTR_ERR(rq);
5185                        goto out_ctx;
5186                }
5187
5188                err = 0;
5189                if (engine->init_context)
5190                        err = engine->init_context(rq);
5191
5192                __i915_request_add(rq, true);
5193                if (err)
5194                        goto err_active;
5195        }
5196
5197        err = i915_gem_switch_to_kernel_context(i915);
5198        if (err)
5199                goto err_active;
5200
5201        err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
5202        if (err)
5203                goto err_active;
5204
5205        assert_kernel_context_is_current(i915);
5206
5207        for_each_engine(engine, i915, id) {
5208                struct i915_vma *state;
5209
5210                state = ctx->engine[id].state;
5211                if (!state)
5212                        continue;
5213
5214                /*
5215                 * As we will hold a reference to the logical state, it will
5216                 * not be torn down with the context, and importantly the
5217                 * object will hold onto its vma (making it possible for a
5218                 * stray GTT write to corrupt our defaults). Unmap the vma
5219                 * from the GTT to prevent such accidents and reclaim the
5220                 * space.
5221                 */
5222                err = i915_vma_unbind(state);
5223                if (err)
5224                        goto err_active;
5225
5226                err = i915_gem_object_set_to_cpu_domain(state->obj, false);
5227                if (err)
5228                        goto err_active;
5229
5230                engine->default_state = i915_gem_object_get(state->obj);
5231        }
5232
5233        if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
5234                unsigned int found = intel_engines_has_context_isolation(i915);
5235
5236                /*
5237                 * Make sure that classes with multiple engine instances all
5238                 * share the same basic configuration.
5239                 */
5240                for_each_engine(engine, i915, id) {
5241                        unsigned int bit = BIT(engine->uabi_class);
5242                        unsigned int expected = engine->default_state ? bit : 0;
5243
5244                        if ((found & bit) != expected) {
5245                                DRM_ERROR("mismatching default context state for class %d on engine %s\n",
5246                                          engine->uabi_class, engine->name);
5247                        }
5248                }
5249        }
5250
5251out_ctx:
5252        i915_gem_context_set_closed(ctx);
5253        i915_gem_context_put(ctx);
5254        return err;
5255
5256err_active:
5257        /*
5258         * If we have to abandon now, we expect the engines to be idle
5259         * and ready to be torn-down. First try to flush any remaining
5260         * request, ensure we are pointing at the kernel context and
5261         * then remove it.
5262         */
5263        if (WARN_ON(i915_gem_switch_to_kernel_context(i915)))
5264                goto out_ctx;
5265
5266        if (WARN_ON(i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED)))
5267                goto out_ctx;
5268
5269        i915_gem_contexts_lost(i915);
5270        goto out_ctx;
5271}
5272
5273int i915_gem_init(struct drm_i915_private *dev_priv)
5274{
5275        int ret;
5276
5277        /*
5278         * We need to fallback to 4K pages since gvt gtt handling doesn't
5279         * support huge page entries - we will need to check either hypervisor
5280         * mm can support huge guest page or just do emulation in gvt.
5281         */
5282        if (intel_vgpu_active(dev_priv))
5283                mkwrite_device_info(dev_priv)->page_sizes =
5284                        I915_GTT_PAGE_SIZE_4K;
5285
5286        dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
5287
5288        if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
5289                dev_priv->gt.resume = intel_lr_context_resume;
5290                dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
5291        } else {
5292                dev_priv->gt.resume = intel_legacy_submission_resume;
5293                dev_priv->gt.cleanup_engine = intel_engine_cleanup;
5294        }
5295
5296        ret = i915_gem_init_userptr(dev_priv);
5297        if (ret)
5298                return ret;
5299
5300        ret = intel_uc_init_misc(dev_priv);
5301        if (ret)
5302                return ret;
5303
5304        /* This is just a security blanket to placate dragons.
5305         * On some systems, we very sporadically observe that the first TLBs
5306         * used by the CS may be stale, despite us poking the TLB reset. If
5307         * we hold the forcewake during initialisation these problems
5308         * just magically go away.
5309         */
5310        mutex_lock(&dev_priv->drm.struct_mutex);
5311        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5312
5313        ret = i915_gem_init_ggtt(dev_priv);
5314        if (ret) {
5315                GEM_BUG_ON(ret == -EIO);
5316                goto err_unlock;
5317        }
5318
5319        ret = i915_gem_contexts_init(dev_priv);
5320        if (ret) {
5321                GEM_BUG_ON(ret == -EIO);
5322                goto err_ggtt;
5323        }
5324
5325        ret = intel_engines_init(dev_priv);
5326        if (ret) {
5327                GEM_BUG_ON(ret == -EIO);
5328                goto err_context;
5329        }
5330
5331        intel_init_gt_powersave(dev_priv);
5332
5333        ret = intel_uc_init(dev_priv);
5334        if (ret)
5335                goto err_pm;
5336
5337        ret = i915_gem_init_hw(dev_priv);
5338        if (ret)
5339                goto err_uc_init;
5340
5341        /*
5342         * Despite its name intel_init_clock_gating applies both display
5343         * clock gating workarounds; GT mmio workarounds and the occasional
5344         * GT power context workaround. Worse, sometimes it includes a context
5345         * register workaround which we need to apply before we record the
5346         * default HW state for all contexts.
5347         *
5348         * FIXME: break up the workarounds and apply them at the right time!
5349         */
5350        intel_init_clock_gating(dev_priv);
5351
5352        ret = __intel_engines_record_defaults(dev_priv);
5353        if (ret)
5354                goto err_init_hw;
5355
5356        if (i915_inject_load_failure()) {
5357                ret = -ENODEV;
5358                goto err_init_hw;
5359        }
5360
5361        if (i915_inject_load_failure()) {
5362                ret = -EIO;
5363                goto err_init_hw;
5364        }
5365
5366        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5367        mutex_unlock(&dev_priv->drm.struct_mutex);
5368
5369        return 0;
5370
5371        /*
5372         * Unwinding is complicated by that we want to handle -EIO to mean
5373         * disable GPU submission but keep KMS alive. We want to mark the
5374         * HW as irrevisibly wedged, but keep enough state around that the
5375         * driver doesn't explode during runtime.
5376         */
5377err_init_hw:
5378        i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED);
5379        i915_gem_contexts_lost(dev_priv);
5380        intel_uc_fini_hw(dev_priv);
5381err_uc_init:
5382        intel_uc_fini(dev_priv);
5383err_pm:
5384        if (ret != -EIO) {
5385                intel_cleanup_gt_powersave(dev_priv);
5386                i915_gem_cleanup_engines(dev_priv);
5387        }
5388err_context:
5389        if (ret != -EIO)
5390                i915_gem_contexts_fini(dev_priv);
5391err_ggtt:
5392err_unlock:
5393        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5394        mutex_unlock(&dev_priv->drm.struct_mutex);
5395
5396        intel_uc_fini_misc(dev_priv);
5397
5398        if (ret != -EIO)
5399                i915_gem_cleanup_userptr(dev_priv);
5400
5401        if (ret == -EIO) {
5402                /*
5403                 * Allow engine initialisation to fail by marking the GPU as
5404                 * wedged. But we only want to do this where the GPU is angry,
5405                 * for all other failure, such as an allocation failure, bail.
5406                 */
5407                if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
5408                        DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
5409                        i915_gem_set_wedged(dev_priv);
5410                }
5411                ret = 0;
5412        }
5413
5414        i915_gem_drain_freed_objects(dev_priv);
5415        return ret;
5416}
5417
5418void i915_gem_init_mmio(struct drm_i915_private *i915)
5419{
5420        i915_gem_sanitize(i915);
5421}
5422
5423void
5424i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
5425{
5426        struct intel_engine_cs *engine;
5427        enum intel_engine_id id;
5428
5429        for_each_engine(engine, dev_priv, id)
5430                dev_priv->gt.cleanup_engine(engine);
5431}
5432
5433void
5434i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
5435{
5436        int i;
5437
5438        if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
5439            !IS_CHERRYVIEW(dev_priv))
5440                dev_priv->num_fence_regs = 32;
5441        else if (INTEL_GEN(dev_priv) >= 4 ||
5442                 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
5443                 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
5444                dev_priv->num_fence_regs = 16;
5445        else
5446                dev_priv->num_fence_regs = 8;
5447
5448        if (intel_vgpu_active(dev_priv))
5449                dev_priv->num_fence_regs =
5450                                I915_READ(vgtif_reg(avail_rs.fence_num));
5451
5452        /* Initialize fence registers to zero */
5453        for (i = 0; i < dev_priv->num_fence_regs; i++) {
5454                struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
5455
5456                fence->i915 = dev_priv;
5457                fence->id = i;
5458                list_add_tail(&fence->link, &dev_priv->mm.fence_list);
5459        }
5460        i915_gem_restore_fences(dev_priv);
5461
5462        i915_gem_detect_bit_6_swizzle(dev_priv);
5463}
5464
5465static void i915_gem_init__mm(struct drm_i915_private *i915)
5466{
5467        spin_lock_init(&i915->mm.object_stat_lock);
5468        spin_lock_init(&i915->mm.obj_lock);
5469        spin_lock_init(&i915->mm.free_lock);
5470
5471        init_llist_head(&i915->mm.free_list);
5472
5473        INIT_LIST_HEAD(&i915->mm.unbound_list);
5474        INIT_LIST_HEAD(&i915->mm.bound_list);
5475        INIT_LIST_HEAD(&i915->mm.fence_list);
5476        INIT_LIST_HEAD(&i915->mm.userfault_list);
5477
5478        INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
5479}
5480
5481int
5482i915_gem_load_init(struct drm_i915_private *dev_priv)
5483{
5484        int err = -ENOMEM;
5485
5486        dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
5487        if (!dev_priv->objects)
5488                goto err_out;
5489
5490        dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN);
5491        if (!dev_priv->vmas)
5492                goto err_objects;
5493
5494        dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0);
5495        if (!dev_priv->luts)
5496                goto err_vmas;
5497
5498        dev_priv->requests = KMEM_CACHE(i915_request,
5499                                        SLAB_HWCACHE_ALIGN |
5500                                        SLAB_RECLAIM_ACCOUNT |
5501                                        SLAB_TYPESAFE_BY_RCU);
5502        if (!dev_priv->requests)
5503                goto err_luts;
5504
5505        dev_priv->dependencies = KMEM_CACHE(i915_dependency,
5506                                            SLAB_HWCACHE_ALIGN |
5507                                            SLAB_RECLAIM_ACCOUNT);
5508        if (!dev_priv->dependencies)
5509                goto err_requests;
5510
5511        dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
5512        if (!dev_priv->priorities)
5513                goto err_dependencies;
5514
5515        mutex_lock(&dev_priv->drm.struct_mutex);
5516        INIT_LIST_HEAD(&dev_priv->gt.timelines);
5517        err = i915_gem_timeline_init__global(dev_priv);
5518        mutex_unlock(&dev_priv->drm.struct_mutex);
5519        if (err)
5520                goto err_priorities;
5521
5522        i915_gem_init__mm(dev_priv);
5523
5524        INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
5525                          i915_gem_retire_work_handler);
5526        INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
5527                          i915_gem_idle_work_handler);
5528        init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
5529        init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5530
5531        atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
5532
5533        spin_lock_init(&dev_priv->fb_tracking.lock);
5534
5535        err = i915_gemfs_init(dev_priv);
5536        if (err)
5537                DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
5538
5539        return 0;
5540
5541err_priorities:
5542        kmem_cache_destroy(dev_priv->priorities);
5543err_dependencies:
5544        kmem_cache_destroy(dev_priv->dependencies);
5545err_requests:
5546        kmem_cache_destroy(dev_priv->requests);
5547err_luts:
5548        kmem_cache_destroy(dev_priv->luts);
5549err_vmas:
5550        kmem_cache_destroy(dev_priv->vmas);
5551err_objects:
5552        kmem_cache_destroy(dev_priv->objects);
5553err_out:
5554        return err;
5555}
5556
5557void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
5558{
5559        i915_gem_drain_freed_objects(dev_priv);
5560        GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
5561        GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
5562        WARN_ON(dev_priv->mm.object_count);
5563
5564        mutex_lock(&dev_priv->drm.struct_mutex);
5565        i915_gem_timeline_fini(&dev_priv->gt.global_timeline);
5566        WARN_ON(!list_empty(&dev_priv->gt.timelines));
5567        mutex_unlock(&dev_priv->drm.struct_mutex);
5568
5569        kmem_cache_destroy(dev_priv->priorities);
5570        kmem_cache_destroy(dev_priv->dependencies);
5571        kmem_cache_destroy(dev_priv->requests);
5572        kmem_cache_destroy(dev_priv->luts);
5573        kmem_cache_destroy(dev_priv->vmas);
5574        kmem_cache_destroy(dev_priv->objects);
5575
5576        /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
5577        rcu_barrier();
5578
5579        i915_gemfs_fini(dev_priv);
5580}
5581
5582int i915_gem_freeze(struct drm_i915_private *dev_priv)
5583{
5584        /* Discard all purgeable objects, let userspace recover those as
5585         * required after resuming.
5586         */
5587        i915_gem_shrink_all(dev_priv);
5588
5589        return 0;
5590}
5591
5592int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
5593{
5594        struct drm_i915_gem_object *obj;
5595        struct list_head *phases[] = {
5596                &dev_priv->mm.unbound_list,
5597                &dev_priv->mm.bound_list,
5598                NULL
5599        }, **p;
5600
5601        /* Called just before we write the hibernation image.
5602         *
5603         * We need to update the domain tracking to reflect that the CPU
5604         * will be accessing all the pages to create and restore from the
5605         * hibernation, and so upon restoration those pages will be in the
5606         * CPU domain.
5607         *
5608         * To make sure the hibernation image contains the latest state,
5609         * we update that state just before writing out the image.
5610         *
5611         * To try and reduce the hibernation image, we manually shrink
5612         * the objects as well, see i915_gem_freeze()
5613         */
5614
5615        i915_gem_shrink(dev_priv, -1UL, NULL, I915_SHRINK_UNBOUND);
5616        i915_gem_drain_freed_objects(dev_priv);
5617
5618        spin_lock(&dev_priv->mm.obj_lock);
5619        for (p = phases; *p; p++) {
5620                list_for_each_entry(obj, *p, mm.link)
5621                        __start_cpu_write(obj);
5622        }
5623        spin_unlock(&dev_priv->mm.obj_lock);
5624
5625        return 0;
5626}
5627
5628void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5629{
5630        struct drm_i915_file_private *file_priv = file->driver_priv;
5631        struct i915_request *request;
5632
5633        /* Clean up our request list when the client is going away, so that
5634         * later retire_requests won't dereference our soon-to-be-gone
5635         * file_priv.
5636         */
5637        spin_lock(&file_priv->mm.lock);
5638        list_for_each_entry(request, &file_priv->mm.request_list, client_link)
5639                request->file_priv = NULL;
5640        spin_unlock(&file_priv->mm.lock);
5641}
5642
5643int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
5644{
5645        struct drm_i915_file_private *file_priv;
5646        int ret;
5647
5648        DRM_DEBUG("\n");
5649
5650        file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5651        if (!file_priv)
5652                return -ENOMEM;
5653
5654        file->driver_priv = file_priv;
5655        file_priv->dev_priv = i915;
5656        file_priv->file = file;
5657
5658        spin_lock_init(&file_priv->mm.lock);
5659        INIT_LIST_HEAD(&file_priv->mm.request_list);
5660
5661        file_priv->bsd_engine = -1;
5662
5663        ret = i915_gem_context_open(i915, file);
5664        if (ret)
5665                kfree(file_priv);
5666
5667        return ret;
5668}
5669
5670/**
5671 * i915_gem_track_fb - update frontbuffer tracking
5672 * @old: current GEM buffer for the frontbuffer slots
5673 * @new: new GEM buffer for the frontbuffer slots
5674 * @frontbuffer_bits: bitmask of frontbuffer slots
5675 *
5676 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
5677 * from @old and setting them in @new. Both @old and @new can be NULL.
5678 */
5679void i915_gem_track_fb(struct drm_i915_gem_object *old,
5680                       struct drm_i915_gem_object *new,
5681                       unsigned frontbuffer_bits)
5682{
5683        /* Control of individual bits within the mask are guarded by
5684         * the owning plane->mutex, i.e. we can never see concurrent
5685         * manipulation of individual bits. But since the bitfield as a whole
5686         * is updated using RMW, we need to use atomics in order to update
5687         * the bits.
5688         */
5689        BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
5690                     sizeof(atomic_t) * BITS_PER_BYTE);
5691
5692        if (old) {
5693                WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
5694                atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
5695        }
5696
5697        if (new) {
5698                WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
5699                atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
5700        }
5701}
5702
5703/* Allocate a new GEM object and fill it with the supplied data */
5704struct drm_i915_gem_object *
5705i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
5706                                 const void *data, size_t size)
5707{
5708        struct drm_i915_gem_object *obj;
5709        struct file *file;
5710        size_t offset;
5711        int err;
5712
5713        obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
5714        if (IS_ERR(obj))
5715                return obj;
5716
5717        GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
5718
5719        file = obj->base.filp;
5720        offset = 0;
5721        do {
5722                unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
5723                struct page *page;
5724                void *pgdata, *vaddr;
5725
5726                err = pagecache_write_begin(file, file->f_mapping,
5727                                            offset, len, 0,
5728                                            &page, &pgdata);
5729                if (err < 0)
5730                        goto fail;
5731
5732                vaddr = kmap(page);
5733                memcpy(vaddr, data, len);
5734                kunmap(page);
5735
5736                err = pagecache_write_end(file, file->f_mapping,
5737                                          offset, len, len,
5738                                          page, pgdata);
5739                if (err < 0)
5740                        goto fail;
5741
5742                size -= len;
5743                data += len;
5744                offset += len;
5745        } while (size);
5746
5747        return obj;
5748
5749fail:
5750        i915_gem_object_put(obj);
5751        return ERR_PTR(err);
5752}
5753
5754struct scatterlist *
5755i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
5756                       unsigned int n,
5757                       unsigned int *offset)
5758{
5759        struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
5760        struct scatterlist *sg;
5761        unsigned int idx, count;
5762
5763        might_sleep();
5764        GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
5765        GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
5766
5767        /* As we iterate forward through the sg, we record each entry in a
5768         * radixtree for quick repeated (backwards) lookups. If we have seen
5769         * this index previously, we will have an entry for it.
5770         *
5771         * Initial lookup is O(N), but this is amortized to O(1) for
5772         * sequential page access (where each new request is consecutive
5773         * to the previous one). Repeated lookups are O(lg(obj->base.size)),
5774         * i.e. O(1) with a large constant!
5775         */
5776        if (n < READ_ONCE(iter->sg_idx))
5777                goto lookup;
5778
5779        mutex_lock(&iter->lock);
5780
5781        /* We prefer to reuse the last sg so that repeated lookup of this
5782         * (or the subsequent) sg are fast - comparing against the last
5783         * sg is faster than going through the radixtree.
5784         */
5785
5786        sg = iter->sg_pos;
5787        idx = iter->sg_idx;
5788        count = __sg_page_count(sg);
5789
5790        while (idx + count <= n) {
5791                unsigned long exception, i;
5792                int ret;
5793
5794                /* If we cannot allocate and insert this entry, or the
5795                 * individual pages from this range, cancel updating the
5796                 * sg_idx so that on this lookup we are forced to linearly
5797                 * scan onwards, but on future lookups we will try the
5798                 * insertion again (in which case we need to be careful of
5799                 * the error return reporting that we have already inserted
5800                 * this index).
5801                 */
5802                ret = radix_tree_insert(&iter->radix, idx, sg);
5803                if (ret && ret != -EEXIST)
5804                        goto scan;
5805
5806                exception =
5807                        RADIX_TREE_EXCEPTIONAL_ENTRY |
5808                        idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
5809                for (i = 1; i < count; i++) {
5810                        ret = radix_tree_insert(&iter->radix, idx + i,
5811                                                (void *)exception);
5812                        if (ret && ret != -EEXIST)
5813                                goto scan;
5814                }
5815
5816                idx += count;
5817                sg = ____sg_next(sg);
5818                count = __sg_page_count(sg);
5819        }
5820
5821scan:
5822        iter->sg_pos = sg;
5823        iter->sg_idx = idx;
5824
5825        mutex_unlock(&iter->lock);
5826
5827        if (unlikely(n < idx)) /* insertion completed by another thread */
5828                goto lookup;
5829
5830        /* In case we failed to insert the entry into the radixtree, we need
5831         * to look beyond the current sg.
5832         */
5833        while (idx + count <= n) {
5834                idx += count;
5835                sg = ____sg_next(sg);
5836                count = __sg_page_count(sg);
5837        }
5838
5839        *offset = n - idx;
5840        return sg;
5841
5842lookup:
5843        rcu_read_lock();
5844
5845        sg = radix_tree_lookup(&iter->radix, n);
5846        GEM_BUG_ON(!sg);
5847
5848        /* If this index is in the middle of multi-page sg entry,
5849         * the radixtree will contain an exceptional entry that points
5850         * to the start of that range. We will return the pointer to
5851         * the base page and the offset of this page within the
5852         * sg entry's range.
5853         */
5854        *offset = 0;
5855        if (unlikely(radix_tree_exception(sg))) {
5856                unsigned long base =
5857                        (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
5858
5859                sg = radix_tree_lookup(&iter->radix, base);
5860                GEM_BUG_ON(!sg);
5861
5862                *offset = n - base;
5863        }
5864
5865        rcu_read_unlock();
5866
5867        return sg;
5868}
5869
5870struct page *
5871i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
5872{
5873        struct scatterlist *sg;
5874        unsigned int offset;
5875
5876        GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
5877
5878        sg = i915_gem_object_get_sg(obj, n, &offset);
5879        return nth_page(sg_page(sg), offset);
5880}
5881
5882/* Like i915_gem_object_get_page(), but mark the returned page dirty */
5883struct page *
5884i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
5885                               unsigned int n)
5886{
5887        struct page *page;
5888
5889        page = i915_gem_object_get_page(obj, n);
5890        if (!obj->mm.dirty)
5891                set_page_dirty(page);
5892
5893        return page;
5894}
5895
5896dma_addr_t
5897i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
5898                                unsigned long n)
5899{
5900        struct scatterlist *sg;
5901        unsigned int offset;
5902
5903        sg = i915_gem_object_get_sg(obj, n, &offset);
5904        return sg_dma_address(sg) + (offset << PAGE_SHIFT);
5905}
5906
5907int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
5908{
5909        struct sg_table *pages;
5910        int err;
5911
5912        if (align > obj->base.size)
5913                return -EINVAL;
5914
5915        if (obj->ops == &i915_gem_phys_ops)
5916                return 0;
5917
5918        if (obj->ops != &i915_gem_object_ops)
5919                return -EINVAL;
5920
5921        err = i915_gem_object_unbind(obj);
5922        if (err)
5923                return err;
5924
5925        mutex_lock(&obj->mm.lock);
5926
5927        if (obj->mm.madv != I915_MADV_WILLNEED) {
5928                err = -EFAULT;
5929                goto err_unlock;
5930        }
5931
5932        if (obj->mm.quirked) {
5933                err = -EFAULT;
5934                goto err_unlock;
5935        }
5936
5937        if (obj->mm.mapping) {
5938                err = -EBUSY;
5939                goto err_unlock;
5940        }
5941
5942        pages = fetch_and_zero(&obj->mm.pages);
5943        if (pages) {
5944                struct drm_i915_private *i915 = to_i915(obj->base.dev);
5945
5946                __i915_gem_object_reset_page_iter(obj);
5947
5948                spin_lock(&i915->mm.obj_lock);
5949                list_del(&obj->mm.link);
5950                spin_unlock(&i915->mm.obj_lock);
5951        }
5952
5953        obj->ops = &i915_gem_phys_ops;
5954
5955        err = ____i915_gem_object_get_pages(obj);
5956        if (err)
5957                goto err_xfer;
5958
5959        /* Perma-pin (until release) the physical set of pages */
5960        __i915_gem_object_pin_pages(obj);
5961
5962        if (!IS_ERR_OR_NULL(pages))
5963                i915_gem_object_ops.put_pages(obj, pages);
5964        mutex_unlock(&obj->mm.lock);
5965        return 0;
5966
5967err_xfer:
5968        obj->ops = &i915_gem_object_ops;
5969        obj->mm.pages = pages;
5970err_unlock:
5971        mutex_unlock(&obj->mm.lock);
5972        return err;
5973}
5974
5975#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5976#include "selftests/scatterlist.c"
5977#include "selftests/mock_gem_device.c"
5978#include "selftests/huge_gem_object.c"
5979#include "selftests/huge_pages.c"
5980#include "selftests/i915_gem_object.c"
5981#include "selftests/i915_gem_coherency.c"
5982#endif
5983