linux/drivers/gpu/drm/i915/i915_gem_execbuffer.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2008,2010 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * Authors:
  24 *    Eric Anholt <eric@anholt.net>
  25 *    Chris Wilson <chris@chris-wilson.co.uk>
  26 *
  27 */
  28
  29#include <drm/drmP.h>
  30#include <drm/i915_drm.h>
  31#include "i915_drv.h"
  32#include "i915_trace.h"
  33#include "intel_drv.h"
  34#include <linux/dma_remapping.h>
  35
  36#define  __EXEC_OBJECT_HAS_PIN (1<<31)
  37#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
  38#define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
  39
  40#define BATCH_OFFSET_BIAS (256*1024)
  41
  42struct eb_vmas {
  43        struct list_head vmas;
  44        int and;
  45        union {
  46                struct i915_vma *lut[0];
  47                struct hlist_head buckets[0];
  48        };
  49};
  50
  51static struct eb_vmas *
  52eb_create(struct drm_i915_gem_execbuffer2 *args)
  53{
  54        struct eb_vmas *eb = NULL;
  55
  56        if (args->flags & I915_EXEC_HANDLE_LUT) {
  57                unsigned size = args->buffer_count;
  58                size *= sizeof(struct i915_vma *);
  59                size += sizeof(struct eb_vmas);
  60                eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
  61        }
  62
  63        if (eb == NULL) {
  64                unsigned size = args->buffer_count;
  65                unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
  66                BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
  67                while (count > 2*size)
  68                        count >>= 1;
  69                eb = kzalloc(count*sizeof(struct hlist_head) +
  70                             sizeof(struct eb_vmas),
  71                             GFP_TEMPORARY);
  72                if (eb == NULL)
  73                        return eb;
  74
  75                eb->and = count - 1;
  76        } else
  77                eb->and = -args->buffer_count;
  78
  79        INIT_LIST_HEAD(&eb->vmas);
  80        return eb;
  81}
  82
  83static void
  84eb_reset(struct eb_vmas *eb)
  85{
  86        if (eb->and >= 0)
  87                memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
  88}
  89
  90static int
  91eb_lookup_vmas(struct eb_vmas *eb,
  92               struct drm_i915_gem_exec_object2 *exec,
  93               const struct drm_i915_gem_execbuffer2 *args,
  94               struct i915_address_space *vm,
  95               struct drm_file *file)
  96{
  97        struct drm_i915_private *dev_priv = vm->dev->dev_private;
  98        struct drm_i915_gem_object *obj;
  99        struct list_head objects;
 100        int i, ret;
 101
 102        INIT_LIST_HEAD(&objects);
 103        spin_lock(&file->table_lock);
 104        /* Grab a reference to the object and release the lock so we can lookup
 105         * or create the VMA without using GFP_ATOMIC */
 106        for (i = 0; i < args->buffer_count; i++) {
 107                obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
 108                if (obj == NULL) {
 109                        spin_unlock(&file->table_lock);
 110                        DRM_DEBUG("Invalid object handle %d at index %d\n",
 111                                   exec[i].handle, i);
 112                        ret = -ENOENT;
 113                        goto err;
 114                }
 115
 116                if (!list_empty(&obj->obj_exec_link)) {
 117                        spin_unlock(&file->table_lock);
 118                        DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
 119                                   obj, exec[i].handle, i);
 120                        ret = -EINVAL;
 121                        goto err;
 122                }
 123
 124                drm_gem_object_reference(&obj->base);
 125                list_add_tail(&obj->obj_exec_link, &objects);
 126        }
 127        spin_unlock(&file->table_lock);
 128
 129        i = 0;
 130        while (!list_empty(&objects)) {
 131                struct i915_vma *vma;
 132                struct i915_address_space *bind_vm = vm;
 133
 134                if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT &&
 135                    USES_FULL_PPGTT(vm->dev)) {
 136                        ret = -EINVAL;
 137                        goto err;
 138                }
 139
 140                /* If we have secure dispatch, or the userspace assures us that
 141                 * they know what they're doing, use the GGTT VM.
 142                 */
 143                if (((args->flags & I915_EXEC_SECURE) &&
 144                    (i == (args->buffer_count - 1))))
 145                        bind_vm = &dev_priv->gtt.base;
 146
 147                obj = list_first_entry(&objects,
 148                                       struct drm_i915_gem_object,
 149                                       obj_exec_link);
 150
 151                /*
 152                 * NOTE: We can leak any vmas created here when something fails
 153                 * later on. But that's no issue since vma_unbind can deal with
 154                 * vmas which are not actually bound. And since only
 155                 * lookup_or_create exists as an interface to get at the vma
 156                 * from the (obj, vm) we don't run the risk of creating
 157                 * duplicated vmas for the same vm.
 158                 */
 159                vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm);
 160                if (IS_ERR(vma)) {
 161                        DRM_DEBUG("Failed to lookup VMA\n");
 162                        ret = PTR_ERR(vma);
 163                        goto err;
 164                }
 165
 166                /* Transfer ownership from the objects list to the vmas list. */
 167                list_add_tail(&vma->exec_list, &eb->vmas);
 168                list_del_init(&obj->obj_exec_link);
 169
 170                vma->exec_entry = &exec[i];
 171                if (eb->and < 0) {
 172                        eb->lut[i] = vma;
 173                } else {
 174                        uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
 175                        vma->exec_handle = handle;
 176                        hlist_add_head(&vma->exec_node,
 177                                       &eb->buckets[handle & eb->and]);
 178                }
 179                ++i;
 180        }
 181
 182        return 0;
 183
 184
 185err:
 186        while (!list_empty(&objects)) {
 187                obj = list_first_entry(&objects,
 188                                       struct drm_i915_gem_object,
 189                                       obj_exec_link);
 190                list_del_init(&obj->obj_exec_link);
 191                drm_gem_object_unreference(&obj->base);
 192        }
 193        /*
 194         * Objects already transfered to the vmas list will be unreferenced by
 195         * eb_destroy.
 196         */
 197
 198        return ret;
 199}
 200
 201static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
 202{
 203        if (eb->and < 0) {
 204                if (handle >= -eb->and)
 205                        return NULL;
 206                return eb->lut[handle];
 207        } else {
 208                struct hlist_head *head;
 209                struct hlist_node *node;
 210
 211                head = &eb->buckets[handle & eb->and];
 212                hlist_for_each(node, head) {
 213                        struct i915_vma *vma;
 214
 215                        vma = hlist_entry(node, struct i915_vma, exec_node);
 216                        if (vma->exec_handle == handle)
 217                                return vma;
 218                }
 219                return NULL;
 220        }
 221}
 222
 223static void
 224i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
 225{
 226        struct drm_i915_gem_exec_object2 *entry;
 227        struct drm_i915_gem_object *obj = vma->obj;
 228
 229        if (!drm_mm_node_allocated(&vma->node))
 230                return;
 231
 232        entry = vma->exec_entry;
 233
 234        if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
 235                i915_gem_object_unpin_fence(obj);
 236
 237        if (entry->flags & __EXEC_OBJECT_HAS_PIN)
 238                vma->pin_count--;
 239
 240        entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
 241}
 242
 243static void eb_destroy(struct eb_vmas *eb)
 244{
 245        while (!list_empty(&eb->vmas)) {
 246                struct i915_vma *vma;
 247
 248                vma = list_first_entry(&eb->vmas,
 249                                       struct i915_vma,
 250                                       exec_list);
 251                list_del_init(&vma->exec_list);
 252                i915_gem_execbuffer_unreserve_vma(vma);
 253                drm_gem_object_unreference(&vma->obj->base);
 254        }
 255        kfree(eb);
 256}
 257
 258static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
 259{
 260        return (HAS_LLC(obj->base.dev) ||
 261                obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
 262                !obj->map_and_fenceable ||
 263                obj->cache_level != I915_CACHE_NONE);
 264}
 265
 266static int
 267relocate_entry_cpu(struct drm_i915_gem_object *obj,
 268                   struct drm_i915_gem_relocation_entry *reloc,
 269                   uint64_t target_offset)
 270{
 271        struct drm_device *dev = obj->base.dev;
 272        uint32_t page_offset = offset_in_page(reloc->offset);
 273        uint64_t delta = reloc->delta + target_offset;
 274        char *vaddr;
 275        int ret;
 276
 277        ret = i915_gem_object_set_to_cpu_domain(obj, true);
 278        if (ret)
 279                return ret;
 280
 281        vaddr = kmap_atomic(i915_gem_object_get_page(obj,
 282                                reloc->offset >> PAGE_SHIFT));
 283        *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
 284
 285        if (INTEL_INFO(dev)->gen >= 8) {
 286                page_offset = offset_in_page(page_offset + sizeof(uint32_t));
 287
 288                if (page_offset == 0) {
 289                        kunmap_atomic(vaddr);
 290                        vaddr = kmap_atomic(i915_gem_object_get_page(obj,
 291                            (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
 292                }
 293
 294                *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
 295        }
 296
 297        kunmap_atomic(vaddr);
 298
 299        return 0;
 300}
 301
 302static int
 303relocate_entry_gtt(struct drm_i915_gem_object *obj,
 304                   struct drm_i915_gem_relocation_entry *reloc,
 305                   uint64_t target_offset)
 306{
 307        struct drm_device *dev = obj->base.dev;
 308        struct drm_i915_private *dev_priv = dev->dev_private;
 309        uint64_t delta = reloc->delta + target_offset;
 310        uint32_t __iomem *reloc_entry;
 311        void __iomem *reloc_page;
 312        int ret;
 313
 314        ret = i915_gem_object_set_to_gtt_domain(obj, true);
 315        if (ret)
 316                return ret;
 317
 318        ret = i915_gem_object_put_fence(obj);
 319        if (ret)
 320                return ret;
 321
 322        /* Map the page containing the relocation we're going to perform.  */
 323        reloc->offset += i915_gem_obj_ggtt_offset(obj);
 324        reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
 325                        reloc->offset & PAGE_MASK);
 326        reloc_entry = (uint32_t __iomem *)
 327                (reloc_page + offset_in_page(reloc->offset));
 328        iowrite32(lower_32_bits(delta), reloc_entry);
 329
 330        if (INTEL_INFO(dev)->gen >= 8) {
 331                reloc_entry += 1;
 332
 333                if (offset_in_page(reloc->offset + sizeof(uint32_t)) == 0) {
 334                        io_mapping_unmap_atomic(reloc_page);
 335                        reloc_page = io_mapping_map_atomic_wc(
 336                                        dev_priv->gtt.mappable,
 337                                        reloc->offset + sizeof(uint32_t));
 338                        reloc_entry = reloc_page;
 339                }
 340
 341                iowrite32(upper_32_bits(delta), reloc_entry);
 342        }
 343
 344        io_mapping_unmap_atomic(reloc_page);
 345
 346        return 0;
 347}
 348
 349static int
 350i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 351                                   struct eb_vmas *eb,
 352                                   struct drm_i915_gem_relocation_entry *reloc)
 353{
 354        struct drm_device *dev = obj->base.dev;
 355        struct drm_gem_object *target_obj;
 356        struct drm_i915_gem_object *target_i915_obj;
 357        struct i915_vma *target_vma;
 358        uint64_t target_offset;
 359        int ret;
 360
 361        /* we've already hold a reference to all valid objects */
 362        target_vma = eb_get_vma(eb, reloc->target_handle);
 363        if (unlikely(target_vma == NULL))
 364                return -ENOENT;
 365        target_i915_obj = target_vma->obj;
 366        target_obj = &target_vma->obj->base;
 367
 368        target_offset = target_vma->node.start;
 369
 370        /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
 371         * pipe_control writes because the gpu doesn't properly redirect them
 372         * through the ppgtt for non_secure batchbuffers. */
 373        if (unlikely(IS_GEN6(dev) &&
 374            reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
 375            !target_i915_obj->has_global_gtt_mapping)) {
 376                struct i915_vma *vma =
 377                        list_first_entry(&target_i915_obj->vma_list,
 378                                         typeof(*vma), vma_link);
 379                vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND);
 380        }
 381
 382        /* Validate that the target is in a valid r/w GPU domain */
 383        if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
 384                DRM_DEBUG("reloc with multiple write domains: "
 385                          "obj %p target %d offset %d "
 386                          "read %08x write %08x",
 387                          obj, reloc->target_handle,
 388                          (int) reloc->offset,
 389                          reloc->read_domains,
 390                          reloc->write_domain);
 391                return -EINVAL;
 392        }
 393        if (unlikely((reloc->write_domain | reloc->read_domains)
 394                     & ~I915_GEM_GPU_DOMAINS)) {
 395                DRM_DEBUG("reloc with read/write non-GPU domains: "
 396                          "obj %p target %d offset %d "
 397                          "read %08x write %08x",
 398                          obj, reloc->target_handle,
 399                          (int) reloc->offset,
 400                          reloc->read_domains,
 401                          reloc->write_domain);
 402                return -EINVAL;
 403        }
 404
 405        target_obj->pending_read_domains |= reloc->read_domains;
 406        target_obj->pending_write_domain |= reloc->write_domain;
 407
 408        /* If the relocation already has the right value in it, no
 409         * more work needs to be done.
 410         */
 411        if (target_offset == reloc->presumed_offset)
 412                return 0;
 413
 414        /* Check that the relocation address is valid... */
 415        if (unlikely(reloc->offset >
 416                obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
 417                DRM_DEBUG("Relocation beyond object bounds: "
 418                          "obj %p target %d offset %d size %d.\n",
 419                          obj, reloc->target_handle,
 420                          (int) reloc->offset,
 421                          (int) obj->base.size);
 422                return -EINVAL;
 423        }
 424        if (unlikely(reloc->offset & 3)) {
 425                DRM_DEBUG("Relocation not 4-byte aligned: "
 426                          "obj %p target %d offset %d.\n",
 427                          obj, reloc->target_handle,
 428                          (int) reloc->offset);
 429                return -EINVAL;
 430        }
 431
 432        /* We can't wait for rendering with pagefaults disabled */
 433        if (obj->active && in_atomic())
 434                return -EFAULT;
 435
 436        if (use_cpu_reloc(obj))
 437                ret = relocate_entry_cpu(obj, reloc, target_offset);
 438        else
 439                ret = relocate_entry_gtt(obj, reloc, target_offset);
 440
 441        if (ret)
 442                return ret;
 443
 444        /* and update the user's relocation entry */
 445        reloc->presumed_offset = target_offset;
 446
 447        return 0;
 448}
 449
 450static int
 451i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
 452                                 struct eb_vmas *eb)
 453{
 454#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
 455        struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
 456        struct drm_i915_gem_relocation_entry __user *user_relocs;
 457        struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 458        int remain, ret;
 459
 460        user_relocs = to_user_ptr(entry->relocs_ptr);
 461
 462        remain = entry->relocation_count;
 463        while (remain) {
 464                struct drm_i915_gem_relocation_entry *r = stack_reloc;
 465                int count = remain;
 466                if (count > ARRAY_SIZE(stack_reloc))
 467                        count = ARRAY_SIZE(stack_reloc);
 468                remain -= count;
 469
 470                if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
 471                        return -EFAULT;
 472
 473                do {
 474                        u64 offset = r->presumed_offset;
 475
 476                        ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
 477                        if (ret)
 478                                return ret;
 479
 480                        if (r->presumed_offset != offset &&
 481                            __copy_to_user_inatomic(&user_relocs->presumed_offset,
 482                                                    &r->presumed_offset,
 483                                                    sizeof(r->presumed_offset))) {
 484                                return -EFAULT;
 485                        }
 486
 487                        user_relocs++;
 488                        r++;
 489                } while (--count);
 490        }
 491
 492        return 0;
 493#undef N_RELOC
 494}
 495
 496static int
 497i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
 498                                      struct eb_vmas *eb,
 499                                      struct drm_i915_gem_relocation_entry *relocs)
 500{
 501        const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 502        int i, ret;
 503
 504        for (i = 0; i < entry->relocation_count; i++) {
 505                ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
 506                if (ret)
 507                        return ret;
 508        }
 509
 510        return 0;
 511}
 512
 513static int
 514i915_gem_execbuffer_relocate(struct eb_vmas *eb)
 515{
 516        struct i915_vma *vma;
 517        int ret = 0;
 518
 519        /* This is the fast path and we cannot handle a pagefault whilst
 520         * holding the struct mutex lest the user pass in the relocations
 521         * contained within a mmaped bo. For in such a case we, the page
 522         * fault handler would call i915_gem_fault() and we would try to
 523         * acquire the struct mutex again. Obviously this is bad and so
 524         * lockdep complains vehemently.
 525         */
 526        pagefault_disable();
 527        list_for_each_entry(vma, &eb->vmas, exec_list) {
 528                ret = i915_gem_execbuffer_relocate_vma(vma, eb);
 529                if (ret)
 530                        break;
 531        }
 532        pagefault_enable();
 533
 534        return ret;
 535}
 536
 537static int
 538need_reloc_mappable(struct i915_vma *vma)
 539{
 540        struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 541        return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
 542                i915_is_ggtt(vma->vm);
 543}
 544
 545static int
 546i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 547                                struct intel_engine_cs *ring,
 548                                bool *need_reloc)
 549{
 550        struct drm_i915_gem_object *obj = vma->obj;
 551        struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 552        bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 553        bool need_fence;
 554        uint64_t flags;
 555        int ret;
 556
 557        flags = 0;
 558
 559        need_fence =
 560                has_fenced_gpu_access &&
 561                entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 562                obj->tiling_mode != I915_TILING_NONE;
 563        if (need_fence || need_reloc_mappable(vma))
 564                flags |= PIN_MAPPABLE;
 565
 566        if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
 567                flags |= PIN_GLOBAL;
 568        if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
 569                flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
 570
 571        ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
 572        if (ret)
 573                return ret;
 574
 575        entry->flags |= __EXEC_OBJECT_HAS_PIN;
 576
 577        if (has_fenced_gpu_access) {
 578                if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
 579                        ret = i915_gem_object_get_fence(obj);
 580                        if (ret)
 581                                return ret;
 582
 583                        if (i915_gem_object_pin_fence(obj))
 584                                entry->flags |= __EXEC_OBJECT_HAS_FENCE;
 585
 586                        obj->pending_fenced_gpu_access = true;
 587                }
 588        }
 589
 590        if (entry->offset != vma->node.start) {
 591                entry->offset = vma->node.start;
 592                *need_reloc = true;
 593        }
 594
 595        if (entry->flags & EXEC_OBJECT_WRITE) {
 596                obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
 597                obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
 598        }
 599
 600        return 0;
 601}
 602
 603static bool
 604eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
 605{
 606        struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 607        struct drm_i915_gem_object *obj = vma->obj;
 608        bool need_fence, need_mappable;
 609
 610        need_fence =
 611                has_fenced_gpu_access &&
 612                entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 613                obj->tiling_mode != I915_TILING_NONE;
 614        need_mappable = need_fence || need_reloc_mappable(vma);
 615
 616        WARN_ON((need_mappable || need_fence) &&
 617               !i915_is_ggtt(vma->vm));
 618
 619        if (entry->alignment &&
 620            vma->node.start & (entry->alignment - 1))
 621                return true;
 622
 623        if (need_mappable && !obj->map_and_fenceable)
 624                return true;
 625
 626        if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
 627            vma->node.start < BATCH_OFFSET_BIAS)
 628                return true;
 629
 630        return false;
 631}
 632
 633static int
 634i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 635                            struct list_head *vmas,
 636                            bool *need_relocs)
 637{
 638        struct drm_i915_gem_object *obj;
 639        struct i915_vma *vma;
 640        struct i915_address_space *vm;
 641        struct list_head ordered_vmas;
 642        bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 643        int retry;
 644
 645        if (list_empty(vmas))
 646                return 0;
 647
 648        i915_gem_retire_requests_ring(ring);
 649
 650        vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
 651
 652        INIT_LIST_HEAD(&ordered_vmas);
 653        while (!list_empty(vmas)) {
 654                struct drm_i915_gem_exec_object2 *entry;
 655                bool need_fence, need_mappable;
 656
 657                vma = list_first_entry(vmas, struct i915_vma, exec_list);
 658                obj = vma->obj;
 659                entry = vma->exec_entry;
 660
 661                need_fence =
 662                        has_fenced_gpu_access &&
 663                        entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 664                        obj->tiling_mode != I915_TILING_NONE;
 665                need_mappable = need_fence || need_reloc_mappable(vma);
 666
 667                if (need_mappable)
 668                        list_move(&vma->exec_list, &ordered_vmas);
 669                else
 670                        list_move_tail(&vma->exec_list, &ordered_vmas);
 671
 672                obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
 673                obj->base.pending_write_domain = 0;
 674                obj->pending_fenced_gpu_access = false;
 675        }
 676        list_splice(&ordered_vmas, vmas);
 677
 678        /* Attempt to pin all of the buffers into the GTT.
 679         * This is done in 3 phases:
 680         *
 681         * 1a. Unbind all objects that do not match the GTT constraints for
 682         *     the execbuffer (fenceable, mappable, alignment etc).
 683         * 1b. Increment pin count for already bound objects.
 684         * 2.  Bind new objects.
 685         * 3.  Decrement pin count.
 686         *
 687         * This avoid unnecessary unbinding of later objects in order to make
 688         * room for the earlier objects *unless* we need to defragment.
 689         */
 690        retry = 0;
 691        do {
 692                int ret = 0;
 693
 694                /* Unbind any ill-fitting objects or pin. */
 695                list_for_each_entry(vma, vmas, exec_list) {
 696                        if (!drm_mm_node_allocated(&vma->node))
 697                                continue;
 698
 699                        if (eb_vma_misplaced(vma, has_fenced_gpu_access))
 700                                ret = i915_vma_unbind(vma);
 701                        else
 702                                ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
 703                        if (ret)
 704                                goto err;
 705                }
 706
 707                /* Bind fresh objects */
 708                list_for_each_entry(vma, vmas, exec_list) {
 709                        if (drm_mm_node_allocated(&vma->node))
 710                                continue;
 711
 712                        ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
 713                        if (ret)
 714                                goto err;
 715                }
 716
 717err:
 718                if (ret != -ENOSPC || retry++)
 719                        return ret;
 720
 721                /* Decrement pin count for bound objects */
 722                list_for_each_entry(vma, vmas, exec_list)
 723                        i915_gem_execbuffer_unreserve_vma(vma);
 724
 725                ret = i915_gem_evict_vm(vm, true);
 726                if (ret)
 727                        return ret;
 728        } while (1);
 729}
 730
 731static int
 732i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 733                                  struct drm_i915_gem_execbuffer2 *args,
 734                                  struct drm_file *file,
 735                                  struct intel_engine_cs *ring,
 736                                  struct eb_vmas *eb,
 737                                  struct drm_i915_gem_exec_object2 *exec)
 738{
 739        struct drm_i915_gem_relocation_entry *reloc;
 740        struct i915_address_space *vm;
 741        struct i915_vma *vma;
 742        bool need_relocs;
 743        int *reloc_offset;
 744        int i, total, ret;
 745        unsigned count = args->buffer_count;
 746
 747        if (WARN_ON(list_empty(&eb->vmas)))
 748                return 0;
 749
 750        vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
 751
 752        /* We may process another execbuffer during the unlock... */
 753        while (!list_empty(&eb->vmas)) {
 754                vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
 755                list_del_init(&vma->exec_list);
 756                i915_gem_execbuffer_unreserve_vma(vma);
 757                drm_gem_object_unreference(&vma->obj->base);
 758        }
 759
 760        mutex_unlock(&dev->struct_mutex);
 761
 762        total = 0;
 763        for (i = 0; i < count; i++)
 764                total += exec[i].relocation_count;
 765
 766        reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
 767        reloc = drm_malloc_ab(total, sizeof(*reloc));
 768        if (reloc == NULL || reloc_offset == NULL) {
 769                drm_free_large(reloc);
 770                drm_free_large(reloc_offset);
 771                mutex_lock(&dev->struct_mutex);
 772                return -ENOMEM;
 773        }
 774
 775        total = 0;
 776        for (i = 0; i < count; i++) {
 777                struct drm_i915_gem_relocation_entry __user *user_relocs;
 778                u64 invalid_offset = (u64)-1;
 779                int j;
 780
 781                user_relocs = to_user_ptr(exec[i].relocs_ptr);
 782
 783                if (copy_from_user(reloc+total, user_relocs,
 784                                   exec[i].relocation_count * sizeof(*reloc))) {
 785                        ret = -EFAULT;
 786                        mutex_lock(&dev->struct_mutex);
 787                        goto err;
 788                }
 789
 790                /* As we do not update the known relocation offsets after
 791                 * relocating (due to the complexities in lock handling),
 792                 * we need to mark them as invalid now so that we force the
 793                 * relocation processing next time. Just in case the target
 794                 * object is evicted and then rebound into its old
 795                 * presumed_offset before the next execbuffer - if that
 796                 * happened we would make the mistake of assuming that the
 797                 * relocations were valid.
 798                 */
 799                for (j = 0; j < exec[i].relocation_count; j++) {
 800                        if (__copy_to_user(&user_relocs[j].presumed_offset,
 801                                           &invalid_offset,
 802                                           sizeof(invalid_offset))) {
 803                                ret = -EFAULT;
 804                                mutex_lock(&dev->struct_mutex);
 805                                goto err;
 806                        }
 807                }
 808
 809                reloc_offset[i] = total;
 810                total += exec[i].relocation_count;
 811        }
 812
 813        ret = i915_mutex_lock_interruptible(dev);
 814        if (ret) {
 815                mutex_lock(&dev->struct_mutex);
 816                goto err;
 817        }
 818
 819        /* reacquire the objects */
 820        eb_reset(eb);
 821        ret = eb_lookup_vmas(eb, exec, args, vm, file);
 822        if (ret)
 823                goto err;
 824
 825        need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
 826        ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
 827        if (ret)
 828                goto err;
 829
 830        list_for_each_entry(vma, &eb->vmas, exec_list) {
 831                int offset = vma->exec_entry - exec;
 832                ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
 833                                                            reloc + reloc_offset[offset]);
 834                if (ret)
 835                        goto err;
 836        }
 837
 838        /* Leave the user relocations as are, this is the painfully slow path,
 839         * and we want to avoid the complication of dropping the lock whilst
 840         * having buffers reserved in the aperture and so causing spurious
 841         * ENOSPC for random operations.
 842         */
 843
 844err:
 845        drm_free_large(reloc);
 846        drm_free_large(reloc_offset);
 847        return ret;
 848}
 849
 850static int
 851i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
 852                                struct list_head *vmas)
 853{
 854        struct i915_vma *vma;
 855        uint32_t flush_domains = 0;
 856        bool flush_chipset = false;
 857        int ret;
 858
 859        list_for_each_entry(vma, vmas, exec_list) {
 860                struct drm_i915_gem_object *obj = vma->obj;
 861                ret = i915_gem_object_sync(obj, ring);
 862                if (ret)
 863                        return ret;
 864
 865                if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
 866                        flush_chipset |= i915_gem_clflush_object(obj, false);
 867
 868                flush_domains |= obj->base.write_domain;
 869        }
 870
 871        if (flush_chipset)
 872                i915_gem_chipset_flush(ring->dev);
 873
 874        if (flush_domains & I915_GEM_DOMAIN_GTT)
 875                wmb();
 876
 877        /* Unconditionally invalidate gpu caches and ensure that we do flush
 878         * any residual writes from the previous batch.
 879         */
 880        return intel_ring_invalidate_all_caches(ring);
 881}
 882
 883static bool
 884i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 885{
 886        if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
 887                return false;
 888
 889        return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
 890}
 891
 892static int
 893validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
 894                   int count)
 895{
 896        int i;
 897        unsigned relocs_total = 0;
 898        unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
 899
 900        for (i = 0; i < count; i++) {
 901                char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
 902                int length; /* limited by fault_in_pages_readable() */
 903
 904                if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
 905                        return -EINVAL;
 906
 907                /* First check for malicious input causing overflow in
 908                 * the worst case where we need to allocate the entire
 909                 * relocation tree as a single array.
 910                 */
 911                if (exec[i].relocation_count > relocs_max - relocs_total)
 912                        return -EINVAL;
 913                relocs_total += exec[i].relocation_count;
 914
 915                length = exec[i].relocation_count *
 916                        sizeof(struct drm_i915_gem_relocation_entry);
 917                /*
 918                 * We must check that the entire relocation array is safe
 919                 * to read, but since we may need to update the presumed
 920                 * offsets during execution, check for full write access.
 921                 */
 922                if (!access_ok(VERIFY_WRITE, ptr, length))
 923                        return -EFAULT;
 924
 925                if (likely(!i915.prefault_disable)) {
 926                        if (fault_in_multipages_readable(ptr, length))
 927                                return -EFAULT;
 928                }
 929        }
 930
 931        return 0;
 932}
 933
 934static struct intel_context *
 935i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
 936                          struct intel_engine_cs *ring, const u32 ctx_id)
 937{
 938        struct intel_context *ctx = NULL;
 939        struct i915_ctx_hang_stats *hs;
 940
 941        if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_ID)
 942                return ERR_PTR(-EINVAL);
 943
 944        ctx = i915_gem_context_get(file->driver_priv, ctx_id);
 945        if (IS_ERR(ctx))
 946                return ctx;
 947
 948        hs = &ctx->hang_stats;
 949        if (hs->banned) {
 950                DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
 951                return ERR_PTR(-EIO);
 952        }
 953
 954        return ctx;
 955}
 956
 957static void
 958i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 959                                   struct intel_engine_cs *ring)
 960{
 961        struct i915_vma *vma;
 962
 963        list_for_each_entry(vma, vmas, exec_list) {
 964                struct drm_i915_gem_object *obj = vma->obj;
 965                u32 old_read = obj->base.read_domains;
 966                u32 old_write = obj->base.write_domain;
 967
 968                obj->base.write_domain = obj->base.pending_write_domain;
 969                if (obj->base.write_domain == 0)
 970                        obj->base.pending_read_domains |= obj->base.read_domains;
 971                obj->base.read_domains = obj->base.pending_read_domains;
 972                obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
 973
 974                i915_vma_move_to_active(vma, ring);
 975                if (obj->base.write_domain) {
 976                        obj->dirty = 1;
 977                        obj->last_write_seqno = intel_ring_get_seqno(ring);
 978                        /* check for potential scanout */
 979                        if (i915_gem_obj_ggtt_bound(obj) &&
 980                            i915_gem_obj_to_ggtt(obj)->pin_count)
 981                                intel_mark_fb_busy(obj, ring);
 982
 983                        /* update for the implicit flush after a batch */
 984                        obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 985                }
 986
 987                trace_i915_gem_object_change_domain(obj, old_read, old_write);
 988        }
 989}
 990
 991static void
 992i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 993                                    struct drm_file *file,
 994                                    struct intel_engine_cs *ring,
 995                                    struct drm_i915_gem_object *obj)
 996{
 997        /* Unconditionally force add_request to emit a full flush. */
 998        ring->gpu_caches_dirty = true;
 999
1000        /* Add a breadcrumb for the completion of the batch buffer */
1001        (void)__i915_add_request(ring, file, obj, NULL);
1002}
1003
1004static int
1005i915_reset_gen7_sol_offsets(struct drm_device *dev,
1006                            struct intel_engine_cs *ring)
1007{
1008        struct drm_i915_private *dev_priv = dev->dev_private;
1009        int ret, i;
1010
1011        if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) {
1012                DRM_DEBUG("sol reset is gen7/rcs only\n");
1013                return -EINVAL;
1014        }
1015
1016        ret = intel_ring_begin(ring, 4 * 3);
1017        if (ret)
1018                return ret;
1019
1020        for (i = 0; i < 4; i++) {
1021                intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1022                intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
1023                intel_ring_emit(ring, 0);
1024        }
1025
1026        intel_ring_advance(ring);
1027
1028        return 0;
1029}
1030
1031/**
1032 * Find one BSD ring to dispatch the corresponding BSD command.
1033 * The Ring ID is returned.
1034 */
1035static int gen8_dispatch_bsd_ring(struct drm_device *dev,
1036                                  struct drm_file *file)
1037{
1038        struct drm_i915_private *dev_priv = dev->dev_private;
1039        struct drm_i915_file_private *file_priv = file->driver_priv;
1040
1041        /* Check whether the file_priv is using one ring */
1042        if (file_priv->bsd_ring)
1043                return file_priv->bsd_ring->id;
1044        else {
1045                /* If no, use the ping-pong mechanism to select one ring */
1046                int ring_id;
1047
1048                mutex_lock(&dev->struct_mutex);
1049                if (dev_priv->mm.bsd_ring_dispatch_index == 0) {
1050                        ring_id = VCS;
1051                        dev_priv->mm.bsd_ring_dispatch_index = 1;
1052                } else {
1053                        ring_id = VCS2;
1054                        dev_priv->mm.bsd_ring_dispatch_index = 0;
1055                }
1056                file_priv->bsd_ring = &dev_priv->ring[ring_id];
1057                mutex_unlock(&dev->struct_mutex);
1058                return ring_id;
1059        }
1060}
1061
1062static struct drm_i915_gem_object *
1063eb_get_batch(struct eb_vmas *eb)
1064{
1065        struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
1066
1067        /*
1068         * SNA is doing fancy tricks with compressing batch buffers, which leads
1069         * to negative relocation deltas. Usually that works out ok since the
1070         * relocate address is still positive, except when the batch is placed
1071         * very low in the GTT. Ensure this doesn't happen.
1072         *
1073         * Note that actual hangs have only been observed on gen7, but for
1074         * paranoia do it everywhere.
1075         */
1076        vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
1077
1078        return vma->obj;
1079}
1080
1081static int
1082i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1083                       struct drm_file *file,
1084                       struct drm_i915_gem_execbuffer2 *args,
1085                       struct drm_i915_gem_exec_object2 *exec)
1086{
1087        struct drm_i915_private *dev_priv = dev->dev_private;
1088        struct eb_vmas *eb;
1089        struct drm_i915_gem_object *batch_obj;
1090        struct drm_clip_rect *cliprects = NULL;
1091        struct intel_engine_cs *ring;
1092        struct intel_context *ctx;
1093        struct i915_address_space *vm;
1094        const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1095        u64 exec_start = args->batch_start_offset, exec_len;
1096        u32 mask, flags;
1097        int ret, mode, i;
1098        bool need_relocs;
1099
1100        if (!i915_gem_check_execbuffer(args))
1101                return -EINVAL;
1102
1103        ret = validate_exec_list(exec, args->buffer_count);
1104        if (ret)
1105                return ret;
1106
1107        flags = 0;
1108        if (args->flags & I915_EXEC_SECURE) {
1109                if (!file->is_master || !capable(CAP_SYS_ADMIN))
1110                    return -EPERM;
1111
1112                flags |= I915_DISPATCH_SECURE;
1113        }
1114        if (args->flags & I915_EXEC_IS_PINNED)
1115                flags |= I915_DISPATCH_PINNED;
1116
1117        if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {
1118                DRM_DEBUG("execbuf with unknown ring: %d\n",
1119                          (int)(args->flags & I915_EXEC_RING_MASK));
1120                return -EINVAL;
1121        }
1122
1123        if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
1124                ring = &dev_priv->ring[RCS];
1125        else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
1126                if (HAS_BSD2(dev)) {
1127                        int ring_id;
1128                        ring_id = gen8_dispatch_bsd_ring(dev, file);
1129                        ring = &dev_priv->ring[ring_id];
1130                } else
1131                        ring = &dev_priv->ring[VCS];
1132        } else
1133                ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
1134
1135        if (!intel_ring_initialized(ring)) {
1136                DRM_DEBUG("execbuf with invalid ring: %d\n",
1137                          (int)(args->flags & I915_EXEC_RING_MASK));
1138                return -EINVAL;
1139        }
1140
1141        mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1142        mask = I915_EXEC_CONSTANTS_MASK;
1143        switch (mode) {
1144        case I915_EXEC_CONSTANTS_REL_GENERAL:
1145        case I915_EXEC_CONSTANTS_ABSOLUTE:
1146        case I915_EXEC_CONSTANTS_REL_SURFACE:
1147                if (mode != 0 && ring != &dev_priv->ring[RCS]) {
1148                        DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
1149                        return -EINVAL;
1150                }
1151
1152                if (mode != dev_priv->relative_constants_mode) {
1153                        if (INTEL_INFO(dev)->gen < 4) {
1154                                DRM_DEBUG("no rel constants on pre-gen4\n");
1155                                return -EINVAL;
1156                        }
1157
1158                        if (INTEL_INFO(dev)->gen > 5 &&
1159                            mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1160                                DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
1161                                return -EINVAL;
1162                        }
1163
1164                        /* The HW changed the meaning on this bit on gen6 */
1165                        if (INTEL_INFO(dev)->gen >= 6)
1166                                mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1167                }
1168                break;
1169        default:
1170                DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
1171                return -EINVAL;
1172        }
1173
1174        if (args->buffer_count < 1) {
1175                DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1176                return -EINVAL;
1177        }
1178
1179        if (args->num_cliprects != 0) {
1180                if (ring != &dev_priv->ring[RCS]) {
1181                        DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1182                        return -EINVAL;
1183                }
1184
1185                if (INTEL_INFO(dev)->gen >= 5) {
1186                        DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
1187                        return -EINVAL;
1188                }
1189
1190                if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
1191                        DRM_DEBUG("execbuf with %u cliprects\n",
1192                                  args->num_cliprects);
1193                        return -EINVAL;
1194                }
1195
1196                cliprects = kcalloc(args->num_cliprects,
1197                                    sizeof(*cliprects),
1198                                    GFP_KERNEL);
1199                if (cliprects == NULL) {
1200                        ret = -ENOMEM;
1201                        goto pre_mutex_err;
1202                }
1203
1204                if (copy_from_user(cliprects,
1205                                   to_user_ptr(args->cliprects_ptr),
1206                                   sizeof(*cliprects)*args->num_cliprects)) {
1207                        ret = -EFAULT;
1208                        goto pre_mutex_err;
1209                }
1210        } else {
1211                if (args->DR4 == 0xffffffff) {
1212                        DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1213                        args->DR4 = 0;
1214                }
1215
1216                if (args->DR1 || args->DR4 || args->cliprects_ptr) {
1217                        DRM_DEBUG("0 cliprects but dirt in cliprects fields\n");
1218                        return -EINVAL;
1219                }
1220        }
1221
1222        intel_runtime_pm_get(dev_priv);
1223
1224        ret = i915_mutex_lock_interruptible(dev);
1225        if (ret)
1226                goto pre_mutex_err;
1227
1228        if (dev_priv->ums.mm_suspended) {
1229                mutex_unlock(&dev->struct_mutex);
1230                ret = -EBUSY;
1231                goto pre_mutex_err;
1232        }
1233
1234        ctx = i915_gem_validate_context(dev, file, ring, ctx_id);
1235        if (IS_ERR(ctx)) {
1236                mutex_unlock(&dev->struct_mutex);
1237                ret = PTR_ERR(ctx);
1238                goto pre_mutex_err;
1239        }
1240
1241        i915_gem_context_reference(ctx);
1242
1243        vm = ctx->vm;
1244        if (!USES_FULL_PPGTT(dev))
1245                vm = &dev_priv->gtt.base;
1246
1247        eb = eb_create(args);
1248        if (eb == NULL) {
1249                i915_gem_context_unreference(ctx);
1250                mutex_unlock(&dev->struct_mutex);
1251                ret = -ENOMEM;
1252                goto pre_mutex_err;
1253        }
1254
1255        /* Look up object handles */
1256        ret = eb_lookup_vmas(eb, exec, args, vm, file);
1257        if (ret)
1258                goto err;
1259
1260        /* take note of the batch buffer before we might reorder the lists */
1261        batch_obj = eb_get_batch(eb);
1262
1263        /* Move the objects en-masse into the GTT, evicting if necessary. */
1264        need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1265        ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
1266        if (ret)
1267                goto err;
1268
1269        /* The objects are in their final locations, apply the relocations. */
1270        if (need_relocs)
1271                ret = i915_gem_execbuffer_relocate(eb);
1272        if (ret) {
1273                if (ret == -EFAULT) {
1274                        ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
1275                                                                eb, exec);
1276                        BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1277                }
1278                if (ret)
1279                        goto err;
1280        }
1281
1282        /* Set the pending read domains for the batch buffer to COMMAND */
1283        if (batch_obj->base.pending_write_domain) {
1284                DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1285                ret = -EINVAL;
1286                goto err;
1287        }
1288        batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1289
1290        if (i915_needs_cmd_parser(ring)) {
1291                ret = i915_parse_cmds(ring,
1292                                      batch_obj,
1293                                      args->batch_start_offset,
1294                                      file->is_master);
1295                if (ret)
1296                        goto err;
1297
1298                /*
1299                 * XXX: Actually do this when enabling batch copy...
1300                 *
1301                 * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit
1302                 * from MI_BATCH_BUFFER_START commands issued in the
1303                 * dispatch_execbuffer implementations. We specifically don't
1304                 * want that set when the command parser is enabled.
1305                 */
1306        }
1307
1308        /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1309         * batch" bit. Hence we need to pin secure batches into the global gtt.
1310         * hsw should have this fixed, but bdw mucks it up again. */
1311        if (flags & I915_DISPATCH_SECURE &&
1312            !batch_obj->has_global_gtt_mapping) {
1313                /* When we have multiple VMs, we'll need to make sure that we
1314                 * allocate space first */
1315                struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
1316                BUG_ON(!vma);
1317                vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
1318        }
1319
1320        if (flags & I915_DISPATCH_SECURE)
1321                exec_start += i915_gem_obj_ggtt_offset(batch_obj);
1322        else
1323                exec_start += i915_gem_obj_offset(batch_obj, vm);
1324
1325        ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
1326        if (ret)
1327                goto err;
1328
1329        ret = i915_switch_context(ring, ctx);
1330        if (ret)
1331                goto err;
1332
1333        if (ring == &dev_priv->ring[RCS] &&
1334            mode != dev_priv->relative_constants_mode) {
1335                ret = intel_ring_begin(ring, 4);
1336                if (ret)
1337                                goto err;
1338
1339                intel_ring_emit(ring, MI_NOOP);
1340                intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1341                intel_ring_emit(ring, INSTPM);
1342                intel_ring_emit(ring, mask << 16 | mode);
1343                intel_ring_advance(ring);
1344
1345                dev_priv->relative_constants_mode = mode;
1346        }
1347
1348        if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1349                ret = i915_reset_gen7_sol_offsets(dev, ring);
1350                if (ret)
1351                        goto err;
1352        }
1353
1354
1355        exec_len = args->batch_len;
1356        if (cliprects) {
1357                for (i = 0; i < args->num_cliprects; i++) {
1358                        ret = i915_emit_box(dev, &cliprects[i],
1359                                            args->DR1, args->DR4);
1360                        if (ret)
1361                                goto err;
1362
1363                        ret = ring->dispatch_execbuffer(ring,
1364                                                        exec_start, exec_len,
1365                                                        flags);
1366                        if (ret)
1367                                goto err;
1368                }
1369        } else {
1370                ret = ring->dispatch_execbuffer(ring,
1371                                                exec_start, exec_len,
1372                                                flags);
1373                if (ret)
1374                        goto err;
1375        }
1376
1377        trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
1378
1379        i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
1380        i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
1381
1382err:
1383        /* the request owns the ref now */
1384        i915_gem_context_unreference(ctx);
1385        eb_destroy(eb);
1386
1387        mutex_unlock(&dev->struct_mutex);
1388
1389pre_mutex_err:
1390        kfree(cliprects);
1391
1392        /* intel_gpu_busy should also get a ref, so it will free when the device
1393         * is really idle. */
1394        intel_runtime_pm_put(dev_priv);
1395        return ret;
1396}
1397
1398/*
1399 * Legacy execbuffer just creates an exec2 list from the original exec object
1400 * list array and passes it to the real function.
1401 */
1402int
1403i915_gem_execbuffer(struct drm_device *dev, void *data,
1404                    struct drm_file *file)
1405{
1406        struct drm_i915_gem_execbuffer *args = data;
1407        struct drm_i915_gem_execbuffer2 exec2;
1408        struct drm_i915_gem_exec_object *exec_list = NULL;
1409        struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1410        int ret, i;
1411
1412        if (args->buffer_count < 1) {
1413                DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1414                return -EINVAL;
1415        }
1416
1417        /* Copy in the exec list from userland */
1418        exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1419        exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1420        if (exec_list == NULL || exec2_list == NULL) {
1421                DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1422                          args->buffer_count);
1423                drm_free_large(exec_list);
1424                drm_free_large(exec2_list);
1425                return -ENOMEM;
1426        }
1427        ret = copy_from_user(exec_list,
1428                             to_user_ptr(args->buffers_ptr),
1429                             sizeof(*exec_list) * args->buffer_count);
1430        if (ret != 0) {
1431                DRM_DEBUG("copy %d exec entries failed %d\n",
1432                          args->buffer_count, ret);
1433                drm_free_large(exec_list);
1434                drm_free_large(exec2_list);
1435                return -EFAULT;
1436        }
1437
1438        for (i = 0; i < args->buffer_count; i++) {
1439                exec2_list[i].handle = exec_list[i].handle;
1440                exec2_list[i].relocation_count = exec_list[i].relocation_count;
1441                exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1442                exec2_list[i].alignment = exec_list[i].alignment;
1443                exec2_list[i].offset = exec_list[i].offset;
1444                if (INTEL_INFO(dev)->gen < 4)
1445                        exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1446                else
1447                        exec2_list[i].flags = 0;
1448        }
1449
1450        exec2.buffers_ptr = args->buffers_ptr;
1451        exec2.buffer_count = args->buffer_count;
1452        exec2.batch_start_offset = args->batch_start_offset;
1453        exec2.batch_len = args->batch_len;
1454        exec2.DR1 = args->DR1;
1455        exec2.DR4 = args->DR4;
1456        exec2.num_cliprects = args->num_cliprects;
1457        exec2.cliprects_ptr = args->cliprects_ptr;
1458        exec2.flags = I915_EXEC_RENDER;
1459        i915_execbuffer2_set_context_id(exec2, 0);
1460
1461        ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1462        if (!ret) {
1463                struct drm_i915_gem_exec_object __user *user_exec_list =
1464                        to_user_ptr(args->buffers_ptr);
1465
1466                /* Copy the new buffer offsets back to the user's exec list. */
1467                for (i = 0; i < args->buffer_count; i++) {
1468                        ret = __copy_to_user(&user_exec_list[i].offset,
1469                                             &exec2_list[i].offset,
1470                                             sizeof(user_exec_list[i].offset));
1471                        if (ret) {
1472                                ret = -EFAULT;
1473                                DRM_DEBUG("failed to copy %d exec entries "
1474                                          "back to user (%d)\n",
1475                                          args->buffer_count, ret);
1476                                break;
1477                        }
1478                }
1479        }
1480
1481        drm_free_large(exec_list);
1482        drm_free_large(exec2_list);
1483        return ret;
1484}
1485
1486int
1487i915_gem_execbuffer2(struct drm_device *dev, void *data,
1488                     struct drm_file *file)
1489{
1490        struct drm_i915_gem_execbuffer2 *args = data;
1491        struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1492        int ret;
1493
1494        if (args->buffer_count < 1 ||
1495            args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1496                DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1497                return -EINVAL;
1498        }
1499
1500        if (args->rsvd2 != 0) {
1501                DRM_DEBUG("dirty rvsd2 field\n");
1502                return -EINVAL;
1503        }
1504
1505        exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1506                             GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
1507        if (exec2_list == NULL)
1508                exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1509                                           args->buffer_count);
1510        if (exec2_list == NULL) {
1511                DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1512                          args->buffer_count);
1513                return -ENOMEM;
1514        }
1515        ret = copy_from_user(exec2_list,
1516                             to_user_ptr(args->buffers_ptr),
1517                             sizeof(*exec2_list) * args->buffer_count);
1518        if (ret != 0) {
1519                DRM_DEBUG("copy %d exec entries failed %d\n",
1520                          args->buffer_count, ret);
1521                drm_free_large(exec2_list);
1522                return -EFAULT;
1523        }
1524
1525        ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1526        if (!ret) {
1527                /* Copy the new buffer offsets back to the user's exec list. */
1528                struct drm_i915_gem_exec_object2 *user_exec_list =
1529                                   to_user_ptr(args->buffers_ptr);
1530                int i;
1531
1532                for (i = 0; i < args->buffer_count; i++) {
1533                        ret = __copy_to_user(&user_exec_list[i].offset,
1534                                             &exec2_list[i].offset,
1535                                             sizeof(user_exec_list[i].offset));
1536                        if (ret) {
1537                                ret = -EFAULT;
1538                                DRM_DEBUG("failed to copy %d exec entries "
1539                                          "back to user\n",
1540                                          args->buffer_count);
1541                                break;
1542                        }
1543                }
1544        }
1545
1546        drm_free_large(exec2_list);
1547        return ret;
1548}
1549