linux/drivers/gpu/drm/i915/i915_gem_execbuffer.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2008,2010 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * Authors:
  24 *    Eric Anholt <eric@anholt.net>
  25 *    Chris Wilson <chris@chris-wilson.co.uk>
  26 *
  27 */
  28
  29#include <drm/drmP.h>
  30#include <drm/i915_drm.h>
  31#include "i915_drv.h"
  32#include "i915_trace.h"
  33#include "intel_drv.h"
  34#include <linux/dma_remapping.h>
  35
  36#define  __EXEC_OBJECT_HAS_PIN (1<<31)
  37#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
  38#define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
  39
  40#define BATCH_OFFSET_BIAS (256*1024)
  41
  42struct eb_vmas {
  43        struct list_head vmas;
  44        int and;
  45        union {
  46                struct i915_vma *lut[0];
  47                struct hlist_head buckets[0];
  48        };
  49};
  50
  51static struct eb_vmas *
  52eb_create(struct drm_i915_gem_execbuffer2 *args)
  53{
  54        struct eb_vmas *eb = NULL;
  55
  56        if (args->flags & I915_EXEC_HANDLE_LUT) {
  57                unsigned size = args->buffer_count;
  58                size *= sizeof(struct i915_vma *);
  59                size += sizeof(struct eb_vmas);
  60                eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
  61        }
  62
  63        if (eb == NULL) {
  64                unsigned size = args->buffer_count;
  65                unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
  66                BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
  67                while (count > 2*size)
  68                        count >>= 1;
  69                eb = kzalloc(count*sizeof(struct hlist_head) +
  70                             sizeof(struct eb_vmas),
  71                             GFP_TEMPORARY);
  72                if (eb == NULL)
  73                        return eb;
  74
  75                eb->and = count - 1;
  76        } else
  77                eb->and = -args->buffer_count;
  78
  79        INIT_LIST_HEAD(&eb->vmas);
  80        return eb;
  81}
  82
  83static void
  84eb_reset(struct eb_vmas *eb)
  85{
  86        if (eb->and >= 0)
  87                memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
  88}
  89
  90static int
  91eb_lookup_vmas(struct eb_vmas *eb,
  92               struct drm_i915_gem_exec_object2 *exec,
  93               const struct drm_i915_gem_execbuffer2 *args,
  94               struct i915_address_space *vm,
  95               struct drm_file *file)
  96{
  97        struct drm_i915_private *dev_priv = vm->dev->dev_private;
  98        struct drm_i915_gem_object *obj;
  99        struct list_head objects;
 100        int i, ret;
 101
 102        INIT_LIST_HEAD(&objects);
 103        spin_lock(&file->table_lock);
 104        /* Grab a reference to the object and release the lock so we can lookup
 105         * or create the VMA without using GFP_ATOMIC */
 106        for (i = 0; i < args->buffer_count; i++) {
 107                obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
 108                if (obj == NULL) {
 109                        spin_unlock(&file->table_lock);
 110                        DRM_DEBUG("Invalid object handle %d at index %d\n",
 111                                   exec[i].handle, i);
 112                        ret = -ENOENT;
 113                        goto err;
 114                }
 115
 116                if (!list_empty(&obj->obj_exec_link)) {
 117                        spin_unlock(&file->table_lock);
 118                        DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
 119                                   obj, exec[i].handle, i);
 120                        ret = -EINVAL;
 121                        goto err;
 122                }
 123
 124                drm_gem_object_reference(&obj->base);
 125                list_add_tail(&obj->obj_exec_link, &objects);
 126        }
 127        spin_unlock(&file->table_lock);
 128
 129        i = 0;
 130        while (!list_empty(&objects)) {
 131                struct i915_vma *vma;
 132                struct i915_address_space *bind_vm = vm;
 133
 134                if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT &&
 135                    USES_FULL_PPGTT(vm->dev)) {
 136                        ret = -EINVAL;
 137                        goto err;
 138                }
 139
 140                /* If we have secure dispatch, or the userspace assures us that
 141                 * they know what they're doing, use the GGTT VM.
 142                 */
 143                if (((args->flags & I915_EXEC_SECURE) &&
 144                    (i == (args->buffer_count - 1))))
 145                        bind_vm = &dev_priv->gtt.base;
 146
 147                obj = list_first_entry(&objects,
 148                                       struct drm_i915_gem_object,
 149                                       obj_exec_link);
 150
 151                /*
 152                 * NOTE: We can leak any vmas created here when something fails
 153                 * later on. But that's no issue since vma_unbind can deal with
 154                 * vmas which are not actually bound. And since only
 155                 * lookup_or_create exists as an interface to get at the vma
 156                 * from the (obj, vm) we don't run the risk of creating
 157                 * duplicated vmas for the same vm.
 158                 */
 159                vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm);
 160                if (IS_ERR(vma)) {
 161                        DRM_DEBUG("Failed to lookup VMA\n");
 162                        ret = PTR_ERR(vma);
 163                        goto err;
 164                }
 165
 166                /* Transfer ownership from the objects list to the vmas list. */
 167                list_add_tail(&vma->exec_list, &eb->vmas);
 168                list_del_init(&obj->obj_exec_link);
 169
 170                vma->exec_entry = &exec[i];
 171                if (eb->and < 0) {
 172                        eb->lut[i] = vma;
 173                } else {
 174                        uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
 175                        vma->exec_handle = handle;
 176                        hlist_add_head(&vma->exec_node,
 177                                       &eb->buckets[handle & eb->and]);
 178                }
 179                ++i;
 180        }
 181
 182        return 0;
 183
 184
 185err:
 186        while (!list_empty(&objects)) {
 187                obj = list_first_entry(&objects,
 188                                       struct drm_i915_gem_object,
 189                                       obj_exec_link);
 190                list_del_init(&obj->obj_exec_link);
 191                drm_gem_object_unreference(&obj->base);
 192        }
 193        /*
 194         * Objects already transfered to the vmas list will be unreferenced by
 195         * eb_destroy.
 196         */
 197
 198        return ret;
 199}
 200
 201static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
 202{
 203        if (eb->and < 0) {
 204                if (handle >= -eb->and)
 205                        return NULL;
 206                return eb->lut[handle];
 207        } else {
 208                struct hlist_head *head;
 209                struct hlist_node *node;
 210
 211                head = &eb->buckets[handle & eb->and];
 212                hlist_for_each(node, head) {
 213                        struct i915_vma *vma;
 214
 215                        vma = hlist_entry(node, struct i915_vma, exec_node);
 216                        if (vma->exec_handle == handle)
 217                                return vma;
 218                }
 219                return NULL;
 220        }
 221}
 222
 223static void
 224i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
 225{
 226        struct drm_i915_gem_exec_object2 *entry;
 227        struct drm_i915_gem_object *obj = vma->obj;
 228
 229        if (!drm_mm_node_allocated(&vma->node))
 230                return;
 231
 232        entry = vma->exec_entry;
 233
 234        if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
 235                i915_gem_object_unpin_fence(obj);
 236
 237        if (entry->flags & __EXEC_OBJECT_HAS_PIN)
 238                vma->pin_count--;
 239
 240        entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
 241}
 242
 243static void eb_destroy(struct eb_vmas *eb)
 244{
 245        while (!list_empty(&eb->vmas)) {
 246                struct i915_vma *vma;
 247
 248                vma = list_first_entry(&eb->vmas,
 249                                       struct i915_vma,
 250                                       exec_list);
 251                list_del_init(&vma->exec_list);
 252                i915_gem_execbuffer_unreserve_vma(vma);
 253                drm_gem_object_unreference(&vma->obj->base);
 254        }
 255        kfree(eb);
 256}
 257
 258static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
 259{
 260        return (HAS_LLC(obj->base.dev) ||
 261                obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
 262                !obj->map_and_fenceable ||
 263                obj->cache_level != I915_CACHE_NONE);
 264}
 265
 266static int
 267relocate_entry_cpu(struct drm_i915_gem_object *obj,
 268                   struct drm_i915_gem_relocation_entry *reloc,
 269                   uint64_t target_offset)
 270{
 271        struct drm_device *dev = obj->base.dev;
 272        uint32_t page_offset = offset_in_page(reloc->offset);
 273        uint64_t delta = reloc->delta + target_offset;
 274        char *vaddr;
 275        int ret;
 276
 277        ret = i915_gem_object_set_to_cpu_domain(obj, true);
 278        if (ret)
 279                return ret;
 280
 281        vaddr = kmap_atomic(i915_gem_object_get_page(obj,
 282                                reloc->offset >> PAGE_SHIFT));
 283        *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
 284
 285        if (INTEL_INFO(dev)->gen >= 8) {
 286                page_offset = offset_in_page(page_offset + sizeof(uint32_t));
 287
 288                if (page_offset == 0) {
 289                        kunmap_atomic(vaddr);
 290                        vaddr = kmap_atomic(i915_gem_object_get_page(obj,
 291                            (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
 292                }
 293
 294                *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
 295        }
 296
 297        kunmap_atomic(vaddr);
 298
 299        return 0;
 300}
 301
 302static int
 303relocate_entry_gtt(struct drm_i915_gem_object *obj,
 304                   struct drm_i915_gem_relocation_entry *reloc,
 305                   uint64_t target_offset)
 306{
 307        struct drm_device *dev = obj->base.dev;
 308        struct drm_i915_private *dev_priv = dev->dev_private;
 309        uint64_t delta = reloc->delta + target_offset;
 310        uint32_t __iomem *reloc_entry;
 311        void __iomem *reloc_page;
 312        int ret;
 313
 314        ret = i915_gem_object_set_to_gtt_domain(obj, true);
 315        if (ret)
 316                return ret;
 317
 318        ret = i915_gem_object_put_fence(obj);
 319        if (ret)
 320                return ret;
 321
 322        /* Map the page containing the relocation we're going to perform.  */
 323        reloc->offset += i915_gem_obj_ggtt_offset(obj);
 324        reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
 325                        reloc->offset & PAGE_MASK);
 326        reloc_entry = (uint32_t __iomem *)
 327                (reloc_page + offset_in_page(reloc->offset));
 328        iowrite32(lower_32_bits(delta), reloc_entry);
 329
 330        if (INTEL_INFO(dev)->gen >= 8) {
 331                reloc_entry += 1;
 332
 333                if (offset_in_page(reloc->offset + sizeof(uint32_t)) == 0) {
 334                        io_mapping_unmap_atomic(reloc_page);
 335                        reloc_page = io_mapping_map_atomic_wc(
 336                                        dev_priv->gtt.mappable,
 337                                        reloc->offset + sizeof(uint32_t));
 338                        reloc_entry = reloc_page;
 339                }
 340
 341                iowrite32(upper_32_bits(delta), reloc_entry);
 342        }
 343
 344        io_mapping_unmap_atomic(reloc_page);
 345
 346        return 0;
 347}
 348
 349static int
 350i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 351                                   struct eb_vmas *eb,
 352                                   struct drm_i915_gem_relocation_entry *reloc)
 353{
 354        struct drm_device *dev = obj->base.dev;
 355        struct drm_gem_object *target_obj;
 356        struct drm_i915_gem_object *target_i915_obj;
 357        struct i915_vma *target_vma;
 358        uint64_t target_offset;
 359        int ret;
 360
 361        /* we've already hold a reference to all valid objects */
 362        target_vma = eb_get_vma(eb, reloc->target_handle);
 363        if (unlikely(target_vma == NULL))
 364                return -ENOENT;
 365        target_i915_obj = target_vma->obj;
 366        target_obj = &target_vma->obj->base;
 367
 368        target_offset = target_vma->node.start;
 369
 370        /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
 371         * pipe_control writes because the gpu doesn't properly redirect them
 372         * through the ppgtt for non_secure batchbuffers. */
 373        if (unlikely(IS_GEN6(dev) &&
 374            reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
 375            !target_i915_obj->has_global_gtt_mapping)) {
 376                struct i915_vma *vma =
 377                        list_first_entry(&target_i915_obj->vma_list,
 378                                         typeof(*vma), vma_link);
 379                vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND);
 380        }
 381
 382        /* Validate that the target is in a valid r/w GPU domain */
 383        if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
 384                DRM_DEBUG("reloc with multiple write domains: "
 385                          "obj %p target %d offset %d "
 386                          "read %08x write %08x",
 387                          obj, reloc->target_handle,
 388                          (int) reloc->offset,
 389                          reloc->read_domains,
 390                          reloc->write_domain);
 391                return -EINVAL;
 392        }
 393        if (unlikely((reloc->write_domain | reloc->read_domains)
 394                     & ~I915_GEM_GPU_DOMAINS)) {
 395                DRM_DEBUG("reloc with read/write non-GPU domains: "
 396                          "obj %p target %d offset %d "
 397                          "read %08x write %08x",
 398                          obj, reloc->target_handle,
 399                          (int) reloc->offset,
 400                          reloc->read_domains,
 401                          reloc->write_domain);
 402                return -EINVAL;
 403        }
 404
 405        target_obj->pending_read_domains |= reloc->read_domains;
 406        target_obj->pending_write_domain |= reloc->write_domain;
 407
 408        /* If the relocation already has the right value in it, no
 409         * more work needs to be done.
 410         */
 411        if (target_offset == reloc->presumed_offset)
 412                return 0;
 413
 414        /* Check that the relocation address is valid... */
 415        if (unlikely(reloc->offset >
 416                obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
 417                DRM_DEBUG("Relocation beyond object bounds: "
 418                          "obj %p target %d offset %d size %d.\n",
 419                          obj, reloc->target_handle,
 420                          (int) reloc->offset,
 421                          (int) obj->base.size);
 422                return -EINVAL;
 423        }
 424        if (unlikely(reloc->offset & 3)) {
 425                DRM_DEBUG("Relocation not 4-byte aligned: "
 426                          "obj %p target %d offset %d.\n",
 427                          obj, reloc->target_handle,
 428                          (int) reloc->offset);
 429                return -EINVAL;
 430        }
 431
 432        /* We can't wait for rendering with pagefaults disabled */
 433        if (obj->active && in_atomic())
 434                return -EFAULT;
 435
 436        if (use_cpu_reloc(obj))
 437                ret = relocate_entry_cpu(obj, reloc, target_offset);
 438        else
 439                ret = relocate_entry_gtt(obj, reloc, target_offset);
 440
 441        if (ret)
 442                return ret;
 443
 444        /* and update the user's relocation entry */
 445        reloc->presumed_offset = target_offset;
 446
 447        return 0;
 448}
 449
 450static int
 451i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
 452                                 struct eb_vmas *eb)
 453{
 454#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
 455        struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
 456        struct drm_i915_gem_relocation_entry __user *user_relocs;
 457        struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 458        int remain, ret;
 459
 460        user_relocs = to_user_ptr(entry->relocs_ptr);
 461
 462        remain = entry->relocation_count;
 463        while (remain) {
 464                struct drm_i915_gem_relocation_entry *r = stack_reloc;
 465                int count = remain;
 466                if (count > ARRAY_SIZE(stack_reloc))
 467                        count = ARRAY_SIZE(stack_reloc);
 468                remain -= count;
 469
 470                if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
 471                        return -EFAULT;
 472
 473                do {
 474                        u64 offset = r->presumed_offset;
 475
 476                        ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
 477                        if (ret)
 478                                return ret;
 479
 480                        if (r->presumed_offset != offset &&
 481                            __copy_to_user_inatomic(&user_relocs->presumed_offset,
 482                                                    &r->presumed_offset,
 483                                                    sizeof(r->presumed_offset))) {
 484                                return -EFAULT;
 485                        }
 486
 487                        user_relocs++;
 488                        r++;
 489                } while (--count);
 490        }
 491
 492        return 0;
 493#undef N_RELOC
 494}
 495
 496static int
 497i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
 498                                      struct eb_vmas *eb,
 499                                      struct drm_i915_gem_relocation_entry *relocs)
 500{
 501        const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 502        int i, ret;
 503
 504        for (i = 0; i < entry->relocation_count; i++) {
 505                ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
 506                if (ret)
 507                        return ret;
 508        }
 509
 510        return 0;
 511}
 512
 513static int
 514i915_gem_execbuffer_relocate(struct eb_vmas *eb)
 515{
 516        struct i915_vma *vma;
 517        int ret = 0;
 518
 519        /* This is the fast path and we cannot handle a pagefault whilst
 520         * holding the struct mutex lest the user pass in the relocations
 521         * contained within a mmaped bo. For in such a case we, the page
 522         * fault handler would call i915_gem_fault() and we would try to
 523         * acquire the struct mutex again. Obviously this is bad and so
 524         * lockdep complains vehemently.
 525         */
 526        pagefault_disable();
 527        list_for_each_entry(vma, &eb->vmas, exec_list) {
 528                ret = i915_gem_execbuffer_relocate_vma(vma, eb);
 529                if (ret)
 530                        break;
 531        }
 532        pagefault_enable();
 533
 534        return ret;
 535}
 536
 537static int
 538need_reloc_mappable(struct i915_vma *vma)
 539{
 540        struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 541        return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
 542                i915_is_ggtt(vma->vm);
 543}
 544
 545static int
 546i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 547                                struct intel_engine_cs *ring,
 548                                bool *need_reloc)
 549{
 550        struct drm_i915_gem_object *obj = vma->obj;
 551        struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 552        bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 553        bool need_fence;
 554        uint64_t flags;
 555        int ret;
 556
 557        flags = 0;
 558
 559        need_fence =
 560                has_fenced_gpu_access &&
 561                entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 562                obj->tiling_mode != I915_TILING_NONE;
 563        if (need_fence || need_reloc_mappable(vma))
 564                flags |= PIN_MAPPABLE;
 565
 566        if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
 567                flags |= PIN_GLOBAL;
 568        if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
 569                flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
 570
 571        ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
 572        if (ret)
 573                return ret;
 574
 575        entry->flags |= __EXEC_OBJECT_HAS_PIN;
 576
 577        if (has_fenced_gpu_access) {
 578                if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
 579                        ret = i915_gem_object_get_fence(obj);
 580                        if (ret)
 581                                return ret;
 582
 583                        if (i915_gem_object_pin_fence(obj))
 584                                entry->flags |= __EXEC_OBJECT_HAS_FENCE;
 585
 586                        obj->pending_fenced_gpu_access = true;
 587                }
 588        }
 589
 590        if (entry->offset != vma->node.start) {
 591                entry->offset = vma->node.start;
 592                *need_reloc = true;
 593        }
 594
 595        if (entry->flags & EXEC_OBJECT_WRITE) {
 596                obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
 597                obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
 598        }
 599
 600        return 0;
 601}
 602
 603static bool
 604eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
 605{
 606        struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 607        struct drm_i915_gem_object *obj = vma->obj;
 608        bool need_fence, need_mappable;
 609
 610        need_fence =
 611                has_fenced_gpu_access &&
 612                entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 613                obj->tiling_mode != I915_TILING_NONE;
 614        need_mappable = need_fence || need_reloc_mappable(vma);
 615
 616        WARN_ON((need_mappable || need_fence) &&
 617               !i915_is_ggtt(vma->vm));
 618
 619        if (entry->alignment &&
 620            vma->node.start & (entry->alignment - 1))
 621                return true;
 622
 623        if (need_mappable && !obj->map_and_fenceable)
 624                return true;
 625
 626        if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
 627            vma->node.start < BATCH_OFFSET_BIAS)
 628                return true;
 629
 630        return false;
 631}
 632
 633static int
 634i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 635                            struct list_head *vmas,
 636                            bool *need_relocs)
 637{
 638        struct drm_i915_gem_object *obj;
 639        struct i915_vma *vma;
 640        struct i915_address_space *vm;
 641        struct list_head ordered_vmas;
 642        bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 643        int retry;
 644
 645        if (list_empty(vmas))
 646                return 0;
 647
 648        i915_gem_retire_requests_ring(ring);
 649
 650        vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
 651
 652        INIT_LIST_HEAD(&ordered_vmas);
 653        while (!list_empty(vmas)) {
 654                struct drm_i915_gem_exec_object2 *entry;
 655                bool need_fence, need_mappable;
 656
 657                vma = list_first_entry(vmas, struct i915_vma, exec_list);
 658                obj = vma->obj;
 659                entry = vma->exec_entry;
 660
 661                need_fence =
 662                        has_fenced_gpu_access &&
 663                        entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 664                        obj->tiling_mode != I915_TILING_NONE;
 665                need_mappable = need_fence || need_reloc_mappable(vma);
 666
 667                if (need_mappable)
 668                        list_move(&vma->exec_list, &ordered_vmas);
 669                else
 670                        list_move_tail(&vma->exec_list, &ordered_vmas);
 671
 672                obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
 673                obj->base.pending_write_domain = 0;
 674                obj->pending_fenced_gpu_access = false;
 675        }
 676        list_splice(&ordered_vmas, vmas);
 677
 678        /* Attempt to pin all of the buffers into the GTT.
 679         * This is done in 3 phases:
 680         *
 681         * 1a. Unbind all objects that do not match the GTT constraints for
 682         *     the execbuffer (fenceable, mappable, alignment etc).
 683         * 1b. Increment pin count for already bound objects.
 684         * 2.  Bind new objects.
 685         * 3.  Decrement pin count.
 686         *
 687         * This avoid unnecessary unbinding of later objects in order to make
 688         * room for the earlier objects *unless* we need to defragment.
 689         */
 690        retry = 0;
 691        do {
 692                int ret = 0;
 693
 694                /* Unbind any ill-fitting objects or pin. */
 695                list_for_each_entry(vma, vmas, exec_list) {
 696                        if (!drm_mm_node_allocated(&vma->node))
 697                                continue;
 698
 699                        if (eb_vma_misplaced(vma, has_fenced_gpu_access))
 700                                ret = i915_vma_unbind(vma);
 701                        else
 702                                ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
 703                        if (ret)
 704                                goto err;
 705                }
 706
 707                /* Bind fresh objects */
 708                list_for_each_entry(vma, vmas, exec_list) {
 709                        if (drm_mm_node_allocated(&vma->node))
 710                                continue;
 711
 712                        ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
 713                        if (ret)
 714                                goto err;
 715                }
 716
 717err:
 718                if (ret != -ENOSPC || retry++)
 719                        return ret;
 720
 721                /* Decrement pin count for bound objects */
 722                list_for_each_entry(vma, vmas, exec_list)
 723                        i915_gem_execbuffer_unreserve_vma(vma);
 724
 725                ret = i915_gem_evict_vm(vm, true);
 726                if (ret)
 727                        return ret;
 728        } while (1);
 729}
 730
 731static int
 732i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 733                                  struct drm_i915_gem_execbuffer2 *args,
 734                                  struct drm_file *file,
 735                                  struct intel_engine_cs *ring,
 736                                  struct eb_vmas *eb,
 737                                  struct drm_i915_gem_exec_object2 *exec)
 738{
 739        struct drm_i915_gem_relocation_entry *reloc;
 740        struct i915_address_space *vm;
 741        struct i915_vma *vma;
 742        bool need_relocs;
 743        int *reloc_offset;
 744        int i, total, ret;
 745        unsigned count = args->buffer_count;
 746
 747        if (WARN_ON(list_empty(&eb->vmas)))
 748                return 0;
 749
 750        vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
 751
 752        /* We may process another execbuffer during the unlock... */
 753        while (!list_empty(&eb->vmas)) {
 754                vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
 755                list_del_init(&vma->exec_list);
 756                i915_gem_execbuffer_unreserve_vma(vma);
 757                drm_gem_object_unreference(&vma->obj->base);
 758        }
 759
 760        mutex_unlock(&dev->struct_mutex);
 761
 762        total = 0;
 763        for (i = 0; i < count; i++)
 764                total += exec[i].relocation_count;
 765
 766        reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
 767        reloc = drm_malloc_ab(total, sizeof(*reloc));
 768        if (reloc == NULL || reloc_offset == NULL) {
 769                drm_free_large(reloc);
 770                drm_free_large(reloc_offset);
 771                mutex_lock(&dev->struct_mutex);
 772                return -ENOMEM;
 773        }
 774
 775        total = 0;
 776        for (i = 0; i < count; i++) {
 777                struct drm_i915_gem_relocation_entry __user *user_relocs;
 778                u64 invalid_offset = (u64)-1;
 779                int j;
 780
 781                user_relocs = to_user_ptr(exec[i].relocs_ptr);
 782
 783                if (copy_from_user(reloc+total, user_relocs,
 784                                   exec[i].relocation_count * sizeof(*reloc))) {
 785                        ret = -EFAULT;
 786                        mutex_lock(&dev->struct_mutex);
 787                        goto err;
 788                }
 789
 790                /* As we do not update the known relocation offsets after
 791                 * relocating (due to the complexities in lock handling),
 792                 * we need to mark them as invalid now so that we force the
 793                 * relocation processing next time. Just in case the target
 794                 * object is evicted and then rebound into its old
 795                 * presumed_offset before the next execbuffer - if that
 796                 * happened we would make the mistake of assuming that the
 797                 * relocations were valid.
 798                 */
 799                for (j = 0; j < exec[i].relocation_count; j++) {
 800                        if (__copy_to_user(&user_relocs[j].presumed_offset,
 801                                           &invalid_offset,
 802                                           sizeof(invalid_offset))) {
 803                                ret = -EFAULT;
 804                                mutex_lock(&dev->struct_mutex);
 805                                goto err;
 806                        }
 807                }
 808
 809                reloc_offset[i] = total;
 810                total += exec[i].relocation_count;
 811        }
 812
 813        ret = i915_mutex_lock_interruptible(dev);
 814        if (ret) {
 815                mutex_lock(&dev->struct_mutex);
 816                goto err;
 817        }
 818
 819        /* reacquire the objects */
 820        eb_reset(eb);
 821        ret = eb_lookup_vmas(eb, exec, args, vm, file);
 822        if (ret)
 823                goto err;
 824
 825        need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
 826        ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
 827        if (ret)
 828                goto err;
 829
 830        list_for_each_entry(vma, &eb->vmas, exec_list) {
 831                int offset = vma->exec_entry - exec;
 832                ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
 833                                                            reloc + reloc_offset[offset]);
 834                if (ret)
 835                        goto err;
 836        }
 837
 838        /* Leave the user relocations as are, this is the painfully slow path,
 839         * and we want to avoid the complication of dropping the lock whilst
 840         * having buffers reserved in the aperture and so causing spurious
 841         * ENOSPC for random operations.
 842         */
 843
 844err:
 845        drm_free_large(reloc);
 846        drm_free_large(reloc_offset);
 847        return ret;
 848}
 849
 850static int
 851i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
 852                                struct list_head *vmas)
 853{
 854        struct i915_vma *vma;
 855        uint32_t flush_domains = 0;
 856        bool flush_chipset = false;
 857        int ret;
 858
 859        list_for_each_entry(vma, vmas, exec_list) {
 860                struct drm_i915_gem_object *obj = vma->obj;
 861                ret = i915_gem_object_sync(obj, ring);
 862                if (ret)
 863                        return ret;
 864
 865                if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
 866                        flush_chipset |= i915_gem_clflush_object(obj, false);
 867
 868                flush_domains |= obj->base.write_domain;
 869        }
 870
 871        if (flush_chipset)
 872                i915_gem_chipset_flush(ring->dev);
 873
 874        if (flush_domains & I915_GEM_DOMAIN_GTT)
 875                wmb();
 876
 877        /* Unconditionally invalidate gpu caches and ensure that we do flush
 878         * any residual writes from the previous batch.
 879         */
 880        return intel_ring_invalidate_all_caches(ring);
 881}
 882
 883static bool
 884i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 885{
 886        if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
 887                return false;
 888
 889        return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
 890}
 891
 892static int
 893validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
 894                   int count)
 895{
 896        int i;
 897        unsigned relocs_total = 0;
 898        unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
 899
 900        for (i = 0; i < count; i++) {
 901                char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
 902                int length; /* limited by fault_in_pages_readable() */
 903
 904                if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
 905                        return -EINVAL;
 906
 907                /* First check for malicious input causing overflow in
 908                 * the worst case where we need to allocate the entire
 909                 * relocation tree as a single array.
 910                 */
 911                if (exec[i].relocation_count > relocs_max - relocs_total)
 912                        return -EINVAL;
 913                relocs_total += exec[i].relocation_count;
 914
 915                length = exec[i].relocation_count *
 916                        sizeof(struct drm_i915_gem_relocation_entry);
 917                /*
 918                 * We must check that the entire relocation array is safe
 919                 * to read, but since we may need to update the presumed
 920                 * offsets during execution, check for full write access.
 921                 */
 922                if (!access_ok(VERIFY_WRITE, ptr, length))
 923                        return -EFAULT;
 924
 925                if (likely(!i915.prefault_disable)) {
 926                        if (fault_in_multipages_readable(ptr, length))
 927                                return -EFAULT;
 928                }
 929        }
 930
 931        return 0;
 932}
 933
 934static struct intel_context *
 935i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
 936                          struct intel_engine_cs *ring, const u32 ctx_id)
 937{
 938        struct intel_context *ctx = NULL;
 939        struct i915_ctx_hang_stats *hs;
 940
 941        if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
 942                return ERR_PTR(-EINVAL);
 943
 944        ctx = i915_gem_context_get(file->driver_priv, ctx_id);
 945        if (IS_ERR(ctx))
 946                return ctx;
 947
 948        hs = &ctx->hang_stats;
 949        if (hs->banned) {
 950                DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
 951                return ERR_PTR(-EIO);
 952        }
 953
 954        return ctx;
 955}
 956
 957static void
 958i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 959                                   struct intel_engine_cs *ring)
 960{
 961        struct i915_vma *vma;
 962
 963        list_for_each_entry(vma, vmas, exec_list) {
 964                struct drm_i915_gem_object *obj = vma->obj;
 965                u32 old_read = obj->base.read_domains;
 966                u32 old_write = obj->base.write_domain;
 967
 968                obj->base.write_domain = obj->base.pending_write_domain;
 969                if (obj->base.write_domain == 0)
 970                        obj->base.pending_read_domains |= obj->base.read_domains;
 971                obj->base.read_domains = obj->base.pending_read_domains;
 972                obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
 973
 974                i915_vma_move_to_active(vma, ring);
 975                if (obj->base.write_domain) {
 976                        obj->dirty = 1;
 977                        obj->last_write_seqno = intel_ring_get_seqno(ring);
 978
 979                        intel_fb_obj_invalidate(obj, ring);
 980
 981                        /* update for the implicit flush after a batch */
 982                        obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 983                }
 984
 985                trace_i915_gem_object_change_domain(obj, old_read, old_write);
 986        }
 987}
 988
 989static void
 990i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 991                                    struct drm_file *file,
 992                                    struct intel_engine_cs *ring,
 993                                    struct drm_i915_gem_object *obj)
 994{
 995        /* Unconditionally force add_request to emit a full flush. */
 996        ring->gpu_caches_dirty = true;
 997
 998        /* Add a breadcrumb for the completion of the batch buffer */
 999        (void)__i915_add_request(ring, file, obj, NULL);
1000}
1001
1002static int
1003i915_reset_gen7_sol_offsets(struct drm_device *dev,
1004                            struct intel_engine_cs *ring)
1005{
1006        struct drm_i915_private *dev_priv = dev->dev_private;
1007        int ret, i;
1008
1009        if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) {
1010                DRM_DEBUG("sol reset is gen7/rcs only\n");
1011                return -EINVAL;
1012        }
1013
1014        ret = intel_ring_begin(ring, 4 * 3);
1015        if (ret)
1016                return ret;
1017
1018        for (i = 0; i < 4; i++) {
1019                intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1020                intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
1021                intel_ring_emit(ring, 0);
1022        }
1023
1024        intel_ring_advance(ring);
1025
1026        return 0;
1027}
1028
1029static int
1030legacy_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
1031                             struct intel_engine_cs *ring,
1032                             struct intel_context *ctx,
1033                             struct drm_i915_gem_execbuffer2 *args,
1034                             struct list_head *vmas,
1035                             struct drm_i915_gem_object *batch_obj,
1036                             u64 exec_start, u32 flags)
1037{
1038        struct drm_clip_rect *cliprects = NULL;
1039        struct drm_i915_private *dev_priv = dev->dev_private;
1040        u64 exec_len;
1041        int instp_mode;
1042        u32 instp_mask;
1043        int i, ret = 0;
1044
1045        if (args->num_cliprects != 0) {
1046                if (ring != &dev_priv->ring[RCS]) {
1047                        DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1048                        return -EINVAL;
1049                }
1050
1051                if (INTEL_INFO(dev)->gen >= 5) {
1052                        DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
1053                        return -EINVAL;
1054                }
1055
1056                if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
1057                        DRM_DEBUG("execbuf with %u cliprects\n",
1058                                  args->num_cliprects);
1059                        return -EINVAL;
1060                }
1061
1062                cliprects = kcalloc(args->num_cliprects,
1063                                    sizeof(*cliprects),
1064                                    GFP_KERNEL);
1065                if (cliprects == NULL) {
1066                        ret = -ENOMEM;
1067                        goto error;
1068                }
1069
1070                if (copy_from_user(cliprects,
1071                                   to_user_ptr(args->cliprects_ptr),
1072                                   sizeof(*cliprects)*args->num_cliprects)) {
1073                        ret = -EFAULT;
1074                        goto error;
1075                }
1076        } else {
1077                if (args->DR4 == 0xffffffff) {
1078                        DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1079                        args->DR4 = 0;
1080                }
1081
1082                if (args->DR1 || args->DR4 || args->cliprects_ptr) {
1083                        DRM_DEBUG("0 cliprects but dirt in cliprects fields\n");
1084                        return -EINVAL;
1085                }
1086        }
1087
1088        ret = i915_gem_execbuffer_move_to_gpu(ring, vmas);
1089        if (ret)
1090                goto error;
1091
1092        ret = i915_switch_context(ring, ctx);
1093        if (ret)
1094                goto error;
1095
1096        instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1097        instp_mask = I915_EXEC_CONSTANTS_MASK;
1098        switch (instp_mode) {
1099        case I915_EXEC_CONSTANTS_REL_GENERAL:
1100        case I915_EXEC_CONSTANTS_ABSOLUTE:
1101        case I915_EXEC_CONSTANTS_REL_SURFACE:
1102                if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
1103                        DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
1104                        ret = -EINVAL;
1105                        goto error;
1106                }
1107
1108                if (instp_mode != dev_priv->relative_constants_mode) {
1109                        if (INTEL_INFO(dev)->gen < 4) {
1110                                DRM_DEBUG("no rel constants on pre-gen4\n");
1111                                ret = -EINVAL;
1112                                goto error;
1113                        }
1114
1115                        if (INTEL_INFO(dev)->gen > 5 &&
1116                            instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1117                                DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
1118                                ret = -EINVAL;
1119                                goto error;
1120                        }
1121
1122                        /* The HW changed the meaning on this bit on gen6 */
1123                        if (INTEL_INFO(dev)->gen >= 6)
1124                                instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1125                }
1126                break;
1127        default:
1128                DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
1129                ret = -EINVAL;
1130                goto error;
1131        }
1132
1133        if (ring == &dev_priv->ring[RCS] &&
1134                        instp_mode != dev_priv->relative_constants_mode) {
1135                ret = intel_ring_begin(ring, 4);
1136                if (ret)
1137                        goto error;
1138
1139                intel_ring_emit(ring, MI_NOOP);
1140                intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1141                intel_ring_emit(ring, INSTPM);
1142                intel_ring_emit(ring, instp_mask << 16 | instp_mode);
1143                intel_ring_advance(ring);
1144
1145                dev_priv->relative_constants_mode = instp_mode;
1146        }
1147
1148        if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1149                ret = i915_reset_gen7_sol_offsets(dev, ring);
1150                if (ret)
1151                        goto error;
1152        }
1153
1154        exec_len = args->batch_len;
1155        if (cliprects) {
1156                for (i = 0; i < args->num_cliprects; i++) {
1157                        ret = i915_emit_box(dev, &cliprects[i],
1158                                            args->DR1, args->DR4);
1159                        if (ret)
1160                                goto error;
1161
1162                        ret = ring->dispatch_execbuffer(ring,
1163                                                        exec_start, exec_len,
1164                                                        flags);
1165                        if (ret)
1166                                goto error;
1167                }
1168        } else {
1169                ret = ring->dispatch_execbuffer(ring,
1170                                                exec_start, exec_len,
1171                                                flags);
1172                if (ret)
1173                        return ret;
1174        }
1175
1176        trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
1177
1178        i915_gem_execbuffer_move_to_active(vmas, ring);
1179        i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
1180
1181error:
1182        kfree(cliprects);
1183        return ret;
1184}
1185
1186/**
1187 * Find one BSD ring to dispatch the corresponding BSD command.
1188 * The Ring ID is returned.
1189 */
1190static int gen8_dispatch_bsd_ring(struct drm_device *dev,
1191                                  struct drm_file *file)
1192{
1193        struct drm_i915_private *dev_priv = dev->dev_private;
1194        struct drm_i915_file_private *file_priv = file->driver_priv;
1195
1196        /* Check whether the file_priv is using one ring */
1197        if (file_priv->bsd_ring)
1198                return file_priv->bsd_ring->id;
1199        else {
1200                /* If no, use the ping-pong mechanism to select one ring */
1201                int ring_id;
1202
1203                mutex_lock(&dev->struct_mutex);
1204                if (dev_priv->mm.bsd_ring_dispatch_index == 0) {
1205                        ring_id = VCS;
1206                        dev_priv->mm.bsd_ring_dispatch_index = 1;
1207                } else {
1208                        ring_id = VCS2;
1209                        dev_priv->mm.bsd_ring_dispatch_index = 0;
1210                }
1211                file_priv->bsd_ring = &dev_priv->ring[ring_id];
1212                mutex_unlock(&dev->struct_mutex);
1213                return ring_id;
1214        }
1215}
1216
1217static struct drm_i915_gem_object *
1218eb_get_batch(struct eb_vmas *eb)
1219{
1220        struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
1221
1222        /*
1223         * SNA is doing fancy tricks with compressing batch buffers, which leads
1224         * to negative relocation deltas. Usually that works out ok since the
1225         * relocate address is still positive, except when the batch is placed
1226         * very low in the GTT. Ensure this doesn't happen.
1227         *
1228         * Note that actual hangs have only been observed on gen7, but for
1229         * paranoia do it everywhere.
1230         */
1231        vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
1232
1233        return vma->obj;
1234}
1235
1236static int
1237i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1238                       struct drm_file *file,
1239                       struct drm_i915_gem_execbuffer2 *args,
1240                       struct drm_i915_gem_exec_object2 *exec)
1241{
1242        struct drm_i915_private *dev_priv = dev->dev_private;
1243        struct eb_vmas *eb;
1244        struct drm_i915_gem_object *batch_obj;
1245        struct intel_engine_cs *ring;
1246        struct intel_context *ctx;
1247        struct i915_address_space *vm;
1248        const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1249        u64 exec_start = args->batch_start_offset;
1250        u32 flags;
1251        int ret;
1252        bool need_relocs;
1253
1254        if (!i915_gem_check_execbuffer(args))
1255                return -EINVAL;
1256
1257        ret = validate_exec_list(exec, args->buffer_count);
1258        if (ret)
1259                return ret;
1260
1261        flags = 0;
1262        if (args->flags & I915_EXEC_SECURE) {
1263                if (!file->is_master || !capable(CAP_SYS_ADMIN))
1264                    return -EPERM;
1265
1266                flags |= I915_DISPATCH_SECURE;
1267        }
1268        if (args->flags & I915_EXEC_IS_PINNED)
1269                flags |= I915_DISPATCH_PINNED;
1270
1271        if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {
1272                DRM_DEBUG("execbuf with unknown ring: %d\n",
1273                          (int)(args->flags & I915_EXEC_RING_MASK));
1274                return -EINVAL;
1275        }
1276
1277        if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
1278                ring = &dev_priv->ring[RCS];
1279        else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
1280                if (HAS_BSD2(dev)) {
1281                        int ring_id;
1282                        ring_id = gen8_dispatch_bsd_ring(dev, file);
1283                        ring = &dev_priv->ring[ring_id];
1284                } else
1285                        ring = &dev_priv->ring[VCS];
1286        } else
1287                ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
1288
1289        if (!intel_ring_initialized(ring)) {
1290                DRM_DEBUG("execbuf with invalid ring: %d\n",
1291                          (int)(args->flags & I915_EXEC_RING_MASK));
1292                return -EINVAL;
1293        }
1294
1295        if (args->buffer_count < 1) {
1296                DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1297                return -EINVAL;
1298        }
1299
1300        intel_runtime_pm_get(dev_priv);
1301
1302        ret = i915_mutex_lock_interruptible(dev);
1303        if (ret)
1304                goto pre_mutex_err;
1305
1306        if (dev_priv->ums.mm_suspended) {
1307                mutex_unlock(&dev->struct_mutex);
1308                ret = -EBUSY;
1309                goto pre_mutex_err;
1310        }
1311
1312        ctx = i915_gem_validate_context(dev, file, ring, ctx_id);
1313        if (IS_ERR(ctx)) {
1314                mutex_unlock(&dev->struct_mutex);
1315                ret = PTR_ERR(ctx);
1316                goto pre_mutex_err;
1317        }
1318
1319        i915_gem_context_reference(ctx);
1320
1321        vm = ctx->vm;
1322        if (!USES_FULL_PPGTT(dev))
1323                vm = &dev_priv->gtt.base;
1324
1325        eb = eb_create(args);
1326        if (eb == NULL) {
1327                i915_gem_context_unreference(ctx);
1328                mutex_unlock(&dev->struct_mutex);
1329                ret = -ENOMEM;
1330                goto pre_mutex_err;
1331        }
1332
1333        /* Look up object handles */
1334        ret = eb_lookup_vmas(eb, exec, args, vm, file);
1335        if (ret)
1336                goto err;
1337
1338        /* take note of the batch buffer before we might reorder the lists */
1339        batch_obj = eb_get_batch(eb);
1340
1341        /* Move the objects en-masse into the GTT, evicting if necessary. */
1342        need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1343        ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
1344        if (ret)
1345                goto err;
1346
1347        /* The objects are in their final locations, apply the relocations. */
1348        if (need_relocs)
1349                ret = i915_gem_execbuffer_relocate(eb);
1350        if (ret) {
1351                if (ret == -EFAULT) {
1352                        ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
1353                                                                eb, exec);
1354                        BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1355                }
1356                if (ret)
1357                        goto err;
1358        }
1359
1360        /* Set the pending read domains for the batch buffer to COMMAND */
1361        if (batch_obj->base.pending_write_domain) {
1362                DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1363                ret = -EINVAL;
1364                goto err;
1365        }
1366        batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1367
1368        if (i915_needs_cmd_parser(ring)) {
1369                ret = i915_parse_cmds(ring,
1370                                      batch_obj,
1371                                      args->batch_start_offset,
1372                                      file->is_master);
1373                if (ret)
1374                        goto err;
1375
1376                /*
1377                 * XXX: Actually do this when enabling batch copy...
1378                 *
1379                 * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit
1380                 * from MI_BATCH_BUFFER_START commands issued in the
1381                 * dispatch_execbuffer implementations. We specifically don't
1382                 * want that set when the command parser is enabled.
1383                 */
1384        }
1385
1386        /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1387         * batch" bit. Hence we need to pin secure batches into the global gtt.
1388         * hsw should have this fixed, but bdw mucks it up again. */
1389        if (flags & I915_DISPATCH_SECURE &&
1390            !batch_obj->has_global_gtt_mapping) {
1391                /* When we have multiple VMs, we'll need to make sure that we
1392                 * allocate space first */
1393                struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
1394                BUG_ON(!vma);
1395                vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
1396        }
1397
1398        if (flags & I915_DISPATCH_SECURE)
1399                exec_start += i915_gem_obj_ggtt_offset(batch_obj);
1400        else
1401                exec_start += i915_gem_obj_offset(batch_obj, vm);
1402
1403        ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
1404                        args, &eb->vmas, batch_obj, exec_start, flags);
1405        if (ret)
1406                goto err;
1407
1408err:
1409        /* the request owns the ref now */
1410        i915_gem_context_unreference(ctx);
1411        eb_destroy(eb);
1412
1413        mutex_unlock(&dev->struct_mutex);
1414
1415pre_mutex_err:
1416        /* intel_gpu_busy should also get a ref, so it will free when the device
1417         * is really idle. */
1418        intel_runtime_pm_put(dev_priv);
1419        return ret;
1420}
1421
1422/*
1423 * Legacy execbuffer just creates an exec2 list from the original exec object
1424 * list array and passes it to the real function.
1425 */
1426int
1427i915_gem_execbuffer(struct drm_device *dev, void *data,
1428                    struct drm_file *file)
1429{
1430        struct drm_i915_gem_execbuffer *args = data;
1431        struct drm_i915_gem_execbuffer2 exec2;
1432        struct drm_i915_gem_exec_object *exec_list = NULL;
1433        struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1434        int ret, i;
1435
1436        if (args->buffer_count < 1) {
1437                DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1438                return -EINVAL;
1439        }
1440
1441        /* Copy in the exec list from userland */
1442        exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1443        exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1444        if (exec_list == NULL || exec2_list == NULL) {
1445                DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1446                          args->buffer_count);
1447                drm_free_large(exec_list);
1448                drm_free_large(exec2_list);
1449                return -ENOMEM;
1450        }
1451        ret = copy_from_user(exec_list,
1452                             to_user_ptr(args->buffers_ptr),
1453                             sizeof(*exec_list) * args->buffer_count);
1454        if (ret != 0) {
1455                DRM_DEBUG("copy %d exec entries failed %d\n",
1456                          args->buffer_count, ret);
1457                drm_free_large(exec_list);
1458                drm_free_large(exec2_list);
1459                return -EFAULT;
1460        }
1461
1462        for (i = 0; i < args->buffer_count; i++) {
1463                exec2_list[i].handle = exec_list[i].handle;
1464                exec2_list[i].relocation_count = exec_list[i].relocation_count;
1465                exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1466                exec2_list[i].alignment = exec_list[i].alignment;
1467                exec2_list[i].offset = exec_list[i].offset;
1468                if (INTEL_INFO(dev)->gen < 4)
1469                        exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1470                else
1471                        exec2_list[i].flags = 0;
1472        }
1473
1474        exec2.buffers_ptr = args->buffers_ptr;
1475        exec2.buffer_count = args->buffer_count;
1476        exec2.batch_start_offset = args->batch_start_offset;
1477        exec2.batch_len = args->batch_len;
1478        exec2.DR1 = args->DR1;
1479        exec2.DR4 = args->DR4;
1480        exec2.num_cliprects = args->num_cliprects;
1481        exec2.cliprects_ptr = args->cliprects_ptr;
1482        exec2.flags = I915_EXEC_RENDER;
1483        i915_execbuffer2_set_context_id(exec2, 0);
1484
1485        ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1486        if (!ret) {
1487                struct drm_i915_gem_exec_object __user *user_exec_list =
1488                        to_user_ptr(args->buffers_ptr);
1489
1490                /* Copy the new buffer offsets back to the user's exec list. */
1491                for (i = 0; i < args->buffer_count; i++) {
1492                        ret = __copy_to_user(&user_exec_list[i].offset,
1493                                             &exec2_list[i].offset,
1494                                             sizeof(user_exec_list[i].offset));
1495                        if (ret) {
1496                                ret = -EFAULT;
1497                                DRM_DEBUG("failed to copy %d exec entries "
1498                                          "back to user (%d)\n",
1499                                          args->buffer_count, ret);
1500                                break;
1501                        }
1502                }
1503        }
1504
1505        drm_free_large(exec_list);
1506        drm_free_large(exec2_list);
1507        return ret;
1508}
1509
1510int
1511i915_gem_execbuffer2(struct drm_device *dev, void *data,
1512                     struct drm_file *file)
1513{
1514        struct drm_i915_gem_execbuffer2 *args = data;
1515        struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1516        int ret;
1517
1518        if (args->buffer_count < 1 ||
1519            args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1520                DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1521                return -EINVAL;
1522        }
1523
1524        if (args->rsvd2 != 0) {
1525                DRM_DEBUG("dirty rvsd2 field\n");
1526                return -EINVAL;
1527        }
1528
1529        exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1530                             GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
1531        if (exec2_list == NULL)
1532                exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1533                                           args->buffer_count);
1534        if (exec2_list == NULL) {
1535                DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1536                          args->buffer_count);
1537                return -ENOMEM;
1538        }
1539        ret = copy_from_user(exec2_list,
1540                             to_user_ptr(args->buffers_ptr),
1541                             sizeof(*exec2_list) * args->buffer_count);
1542        if (ret != 0) {
1543                DRM_DEBUG("copy %d exec entries failed %d\n",
1544                          args->buffer_count, ret);
1545                drm_free_large(exec2_list);
1546                return -EFAULT;
1547        }
1548
1549        ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1550        if (!ret) {
1551                /* Copy the new buffer offsets back to the user's exec list. */
1552                struct drm_i915_gem_exec_object2 __user *user_exec_list =
1553                                   to_user_ptr(args->buffers_ptr);
1554                int i;
1555
1556                for (i = 0; i < args->buffer_count; i++) {
1557                        ret = __copy_to_user(&user_exec_list[i].offset,
1558                                             &exec2_list[i].offset,
1559                                             sizeof(user_exec_list[i].offset));
1560                        if (ret) {
1561                                ret = -EFAULT;
1562                                DRM_DEBUG("failed to copy %d exec entries "
1563                                          "back to user\n",
1564                                          args->buffer_count);
1565                                break;
1566                        }
1567                }
1568        }
1569
1570        drm_free_large(exec2_list);
1571        return ret;
1572}
1573