linux/drivers/gpu/drm/i915/i915_gem_execbuffer.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2008,2010 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * Authors:
  24 *    Eric Anholt <eric@anholt.net>
  25 *    Chris Wilson <chris@chris-wilson.co.uk>
  26 *
  27 */
  28
  29#include <drm/drmP.h>
  30#include <drm/i915_drm.h>
  31#include "i915_drv.h"
  32#include "i915_trace.h"
  33#include "intel_drv.h"
  34#include <linux/dma_remapping.h>
  35
  36struct eb_objects {
  37        struct list_head objects;
  38        int and;
  39        union {
  40                struct drm_i915_gem_object *lut[0];
  41                struct hlist_head buckets[0];
  42        };
  43};
  44
  45static struct eb_objects *
  46eb_create(struct drm_i915_gem_execbuffer2 *args)
  47{
  48        struct eb_objects *eb = NULL;
  49
  50        if (args->flags & I915_EXEC_HANDLE_LUT) {
  51                int size = args->buffer_count;
  52                size *= sizeof(struct drm_i915_gem_object *);
  53                size += sizeof(struct eb_objects);
  54                eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
  55        }
  56
  57        if (eb == NULL) {
  58                int size = args->buffer_count;
  59                int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
  60                BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
  61                while (count > 2*size)
  62                        count >>= 1;
  63                eb = kzalloc(count*sizeof(struct hlist_head) +
  64                             sizeof(struct eb_objects),
  65                             GFP_TEMPORARY);
  66                if (eb == NULL)
  67                        return eb;
  68
  69                eb->and = count - 1;
  70        } else
  71                eb->and = -args->buffer_count;
  72
  73        INIT_LIST_HEAD(&eb->objects);
  74        return eb;
  75}
  76
  77static void
  78eb_reset(struct eb_objects *eb)
  79{
  80        if (eb->and >= 0)
  81                memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
  82}
  83
  84static int
  85eb_lookup_objects(struct eb_objects *eb,
  86                  struct drm_i915_gem_exec_object2 *exec,
  87                  const struct drm_i915_gem_execbuffer2 *args,
  88                  struct drm_file *file)
  89{
  90        int i;
  91
  92        spin_lock(&file->table_lock);
  93        for (i = 0; i < args->buffer_count; i++) {
  94                struct drm_i915_gem_object *obj;
  95
  96                obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
  97                if (obj == NULL) {
  98                        spin_unlock(&file->table_lock);
  99                        DRM_DEBUG("Invalid object handle %d at index %d\n",
 100                                   exec[i].handle, i);
 101                        return -ENOENT;
 102                }
 103
 104                if (!list_empty(&obj->exec_list)) {
 105                        spin_unlock(&file->table_lock);
 106                        DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
 107                                   obj, exec[i].handle, i);
 108                        return -EINVAL;
 109                }
 110
 111                drm_gem_object_reference(&obj->base);
 112                list_add_tail(&obj->exec_list, &eb->objects);
 113
 114                obj->exec_entry = &exec[i];
 115                if (eb->and < 0) {
 116                        eb->lut[i] = obj;
 117                } else {
 118                        uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
 119                        obj->exec_handle = handle;
 120                        hlist_add_head(&obj->exec_node,
 121                                       &eb->buckets[handle & eb->and]);
 122                }
 123        }
 124        spin_unlock(&file->table_lock);
 125
 126        return 0;
 127}
 128
 129static struct drm_i915_gem_object *
 130eb_get_object(struct eb_objects *eb, unsigned long handle)
 131{
 132        if (eb->and < 0) {
 133                if (handle >= -eb->and)
 134                        return NULL;
 135                return eb->lut[handle];
 136        } else {
 137                struct hlist_head *head;
 138                struct hlist_node *node;
 139
 140                head = &eb->buckets[handle & eb->and];
 141                hlist_for_each(node, head) {
 142                        struct drm_i915_gem_object *obj;
 143
 144                        obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
 145                        if (obj->exec_handle == handle)
 146                                return obj;
 147                }
 148                return NULL;
 149        }
 150}
 151
 152static void
 153eb_destroy(struct eb_objects *eb)
 154{
 155        while (!list_empty(&eb->objects)) {
 156                struct drm_i915_gem_object *obj;
 157
 158                obj = list_first_entry(&eb->objects,
 159                                       struct drm_i915_gem_object,
 160                                       exec_list);
 161                list_del_init(&obj->exec_list);
 162                drm_gem_object_unreference(&obj->base);
 163        }
 164        kfree(eb);
 165}
 166
 167static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
 168{
 169        return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
 170                !obj->map_and_fenceable ||
 171                obj->cache_level != I915_CACHE_NONE);
 172}
 173
 174static int
 175i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 176                                   struct eb_objects *eb,
 177                                   struct drm_i915_gem_relocation_entry *reloc)
 178{
 179        struct drm_device *dev = obj->base.dev;
 180        struct drm_gem_object *target_obj;
 181        struct drm_i915_gem_object *target_i915_obj;
 182        uint32_t target_offset;
 183        int ret = -EINVAL;
 184
 185        /* we've already hold a reference to all valid objects */
 186        target_obj = &eb_get_object(eb, reloc->target_handle)->base;
 187        if (unlikely(target_obj == NULL))
 188                return -ENOENT;
 189
 190        target_i915_obj = to_intel_bo(target_obj);
 191        target_offset = target_i915_obj->gtt_offset;
 192
 193        /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
 194         * pipe_control writes because the gpu doesn't properly redirect them
 195         * through the ppgtt for non_secure batchbuffers. */
 196        if (unlikely(IS_GEN6(dev) &&
 197            reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
 198            !target_i915_obj->has_global_gtt_mapping)) {
 199                i915_gem_gtt_bind_object(target_i915_obj,
 200                                         target_i915_obj->cache_level);
 201        }
 202
 203        /* Validate that the target is in a valid r/w GPU domain */
 204        if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
 205                DRM_DEBUG("reloc with multiple write domains: "
 206                          "obj %p target %d offset %d "
 207                          "read %08x write %08x",
 208                          obj, reloc->target_handle,
 209                          (int) reloc->offset,
 210                          reloc->read_domains,
 211                          reloc->write_domain);
 212                return ret;
 213        }
 214        if (unlikely((reloc->write_domain | reloc->read_domains)
 215                     & ~I915_GEM_GPU_DOMAINS)) {
 216                DRM_DEBUG("reloc with read/write non-GPU domains: "
 217                          "obj %p target %d offset %d "
 218                          "read %08x write %08x",
 219                          obj, reloc->target_handle,
 220                          (int) reloc->offset,
 221                          reloc->read_domains,
 222                          reloc->write_domain);
 223                return ret;
 224        }
 225
 226        target_obj->pending_read_domains |= reloc->read_domains;
 227        target_obj->pending_write_domain |= reloc->write_domain;
 228
 229        /* If the relocation already has the right value in it, no
 230         * more work needs to be done.
 231         */
 232        if (target_offset == reloc->presumed_offset)
 233                return 0;
 234
 235        /* Check that the relocation address is valid... */
 236        if (unlikely(reloc->offset > obj->base.size - 4)) {
 237                DRM_DEBUG("Relocation beyond object bounds: "
 238                          "obj %p target %d offset %d size %d.\n",
 239                          obj, reloc->target_handle,
 240                          (int) reloc->offset,
 241                          (int) obj->base.size);
 242                return ret;
 243        }
 244        if (unlikely(reloc->offset & 3)) {
 245                DRM_DEBUG("Relocation not 4-byte aligned: "
 246                          "obj %p target %d offset %d.\n",
 247                          obj, reloc->target_handle,
 248                          (int) reloc->offset);
 249                return ret;
 250        }
 251
 252        /* We can't wait for rendering with pagefaults disabled */
 253        if (obj->active && in_atomic())
 254                return -EFAULT;
 255
 256        reloc->delta += target_offset;
 257        if (use_cpu_reloc(obj)) {
 258                uint32_t page_offset = reloc->offset & ~PAGE_MASK;
 259                char *vaddr;
 260
 261                ret = i915_gem_object_set_to_cpu_domain(obj, 1);
 262                if (ret)
 263                        return ret;
 264
 265                vaddr = kmap_atomic(i915_gem_object_get_page(obj,
 266                                                             reloc->offset >> PAGE_SHIFT));
 267                *(uint32_t *)(vaddr + page_offset) = reloc->delta;
 268                kunmap_atomic(vaddr);
 269        } else {
 270                struct drm_i915_private *dev_priv = dev->dev_private;
 271                uint32_t __iomem *reloc_entry;
 272                void __iomem *reloc_page;
 273
 274                ret = i915_gem_object_set_to_gtt_domain(obj, true);
 275                if (ret)
 276                        return ret;
 277
 278                ret = i915_gem_object_put_fence(obj);
 279                if (ret)
 280                        return ret;
 281
 282                /* Map the page containing the relocation we're going to perform.  */
 283                reloc->offset += obj->gtt_offset;
 284                reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
 285                                                      reloc->offset & PAGE_MASK);
 286                reloc_entry = (uint32_t __iomem *)
 287                        (reloc_page + (reloc->offset & ~PAGE_MASK));
 288                iowrite32(reloc->delta, reloc_entry);
 289                io_mapping_unmap_atomic(reloc_page);
 290        }
 291
 292        /* and update the user's relocation entry */
 293        reloc->presumed_offset = target_offset;
 294
 295        return 0;
 296}
 297
 298static int
 299i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
 300                                    struct eb_objects *eb)
 301{
 302#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
 303        struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
 304        struct drm_i915_gem_relocation_entry __user *user_relocs;
 305        struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 306        int remain, ret;
 307
 308        user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
 309
 310        remain = entry->relocation_count;
 311        while (remain) {
 312                struct drm_i915_gem_relocation_entry *r = stack_reloc;
 313                int count = remain;
 314                if (count > ARRAY_SIZE(stack_reloc))
 315                        count = ARRAY_SIZE(stack_reloc);
 316                remain -= count;
 317
 318                if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
 319                        return -EFAULT;
 320
 321                do {
 322                        u64 offset = r->presumed_offset;
 323
 324                        ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
 325                        if (ret)
 326                                return ret;
 327
 328                        if (r->presumed_offset != offset &&
 329                            __copy_to_user_inatomic(&user_relocs->presumed_offset,
 330                                                    &r->presumed_offset,
 331                                                    sizeof(r->presumed_offset))) {
 332                                return -EFAULT;
 333                        }
 334
 335                        user_relocs++;
 336                        r++;
 337                } while (--count);
 338        }
 339
 340        return 0;
 341#undef N_RELOC
 342}
 343
 344static int
 345i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
 346                                         struct eb_objects *eb,
 347                                         struct drm_i915_gem_relocation_entry *relocs)
 348{
 349        const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 350        int i, ret;
 351
 352        for (i = 0; i < entry->relocation_count; i++) {
 353                ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
 354                if (ret)
 355                        return ret;
 356        }
 357
 358        return 0;
 359}
 360
 361static int
 362i915_gem_execbuffer_relocate(struct drm_device *dev,
 363                             struct eb_objects *eb)
 364{
 365        struct drm_i915_gem_object *obj;
 366        int ret = 0;
 367
 368        /* This is the fast path and we cannot handle a pagefault whilst
 369         * holding the struct mutex lest the user pass in the relocations
 370         * contained within a mmaped bo. For in such a case we, the page
 371         * fault handler would call i915_gem_fault() and we would try to
 372         * acquire the struct mutex again. Obviously this is bad and so
 373         * lockdep complains vehemently.
 374         */
 375        pagefault_disable();
 376        list_for_each_entry(obj, &eb->objects, exec_list) {
 377                ret = i915_gem_execbuffer_relocate_object(obj, eb);
 378                if (ret)
 379                        break;
 380        }
 381        pagefault_enable();
 382
 383        return ret;
 384}
 385
 386#define  __EXEC_OBJECT_HAS_PIN (1<<31)
 387#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
 388
 389static int
 390need_reloc_mappable(struct drm_i915_gem_object *obj)
 391{
 392        struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 393        return entry->relocation_count && !use_cpu_reloc(obj);
 394}
 395
 396static int
 397i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
 398                                   struct intel_ring_buffer *ring,
 399                                   bool *need_reloc)
 400{
 401        struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
 402        struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 403        bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 404        bool need_fence, need_mappable;
 405        int ret;
 406
 407        need_fence =
 408                has_fenced_gpu_access &&
 409                entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 410                obj->tiling_mode != I915_TILING_NONE;
 411        need_mappable = need_fence || need_reloc_mappable(obj);
 412
 413        ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false);
 414        if (ret)
 415                return ret;
 416
 417        entry->flags |= __EXEC_OBJECT_HAS_PIN;
 418
 419        if (has_fenced_gpu_access) {
 420                if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
 421                        ret = i915_gem_object_get_fence(obj);
 422                        if (ret)
 423                                return ret;
 424
 425                        if (i915_gem_object_pin_fence(obj))
 426                                entry->flags |= __EXEC_OBJECT_HAS_FENCE;
 427
 428                        obj->pending_fenced_gpu_access = true;
 429                }
 430        }
 431
 432        /* Ensure ppgtt mapping exists if needed */
 433        if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
 434                i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
 435                                       obj, obj->cache_level);
 436
 437                obj->has_aliasing_ppgtt_mapping = 1;
 438        }
 439
 440        if (entry->offset != obj->gtt_offset) {
 441                entry->offset = obj->gtt_offset;
 442                *need_reloc = true;
 443        }
 444
 445        if (entry->flags & EXEC_OBJECT_WRITE) {
 446                obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
 447                obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
 448        }
 449
 450        if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
 451            !obj->has_global_gtt_mapping)
 452                i915_gem_gtt_bind_object(obj, obj->cache_level);
 453
 454        return 0;
 455}
 456
 457static void
 458i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
 459{
 460        struct drm_i915_gem_exec_object2 *entry;
 461
 462        if (!obj->gtt_space)
 463                return;
 464
 465        entry = obj->exec_entry;
 466
 467        if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
 468                i915_gem_object_unpin_fence(obj);
 469
 470        if (entry->flags & __EXEC_OBJECT_HAS_PIN)
 471                i915_gem_object_unpin(obj);
 472
 473        entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
 474}
 475
 476static int
 477i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 478                            struct drm_file *file,
 479                            struct list_head *objects,
 480                            bool *need_relocs)
 481{
 482        struct drm_i915_gem_object *obj;
 483        struct list_head ordered_objects;
 484        bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 485        int retry;
 486
 487        INIT_LIST_HEAD(&ordered_objects);
 488        while (!list_empty(objects)) {
 489                struct drm_i915_gem_exec_object2 *entry;
 490                bool need_fence, need_mappable;
 491
 492                obj = list_first_entry(objects,
 493                                       struct drm_i915_gem_object,
 494                                       exec_list);
 495                entry = obj->exec_entry;
 496
 497                need_fence =
 498                        has_fenced_gpu_access &&
 499                        entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 500                        obj->tiling_mode != I915_TILING_NONE;
 501                need_mappable = need_fence || need_reloc_mappable(obj);
 502
 503                if (need_mappable)
 504                        list_move(&obj->exec_list, &ordered_objects);
 505                else
 506                        list_move_tail(&obj->exec_list, &ordered_objects);
 507
 508                obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
 509                obj->base.pending_write_domain = 0;
 510                obj->pending_fenced_gpu_access = false;
 511        }
 512        list_splice(&ordered_objects, objects);
 513
 514        /* Attempt to pin all of the buffers into the GTT.
 515         * This is done in 3 phases:
 516         *
 517         * 1a. Unbind all objects that do not match the GTT constraints for
 518         *     the execbuffer (fenceable, mappable, alignment etc).
 519         * 1b. Increment pin count for already bound objects.
 520         * 2.  Bind new objects.
 521         * 3.  Decrement pin count.
 522         *
 523         * This avoid unnecessary unbinding of later objects in order to make
 524         * room for the earlier objects *unless* we need to defragment.
 525         */
 526        retry = 0;
 527        do {
 528                int ret = 0;
 529
 530                /* Unbind any ill-fitting objects or pin. */
 531                list_for_each_entry(obj, objects, exec_list) {
 532                        struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 533                        bool need_fence, need_mappable;
 534
 535                        if (!obj->gtt_space)
 536                                continue;
 537
 538                        need_fence =
 539                                has_fenced_gpu_access &&
 540                                entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 541                                obj->tiling_mode != I915_TILING_NONE;
 542                        need_mappable = need_fence || need_reloc_mappable(obj);
 543
 544                        if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
 545                            (need_mappable && !obj->map_and_fenceable))
 546                                ret = i915_gem_object_unbind(obj);
 547                        else
 548                                ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
 549                        if (ret)
 550                                goto err;
 551                }
 552
 553                /* Bind fresh objects */
 554                list_for_each_entry(obj, objects, exec_list) {
 555                        if (obj->gtt_space)
 556                                continue;
 557
 558                        ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
 559                        if (ret)
 560                                goto err;
 561                }
 562
 563err:            /* Decrement pin count for bound objects */
 564                list_for_each_entry(obj, objects, exec_list)
 565                        i915_gem_execbuffer_unreserve_object(obj);
 566
 567                if (ret != -ENOSPC || retry++)
 568                        return ret;
 569
 570                ret = i915_gem_evict_everything(ring->dev);
 571                if (ret)
 572                        return ret;
 573        } while (1);
 574}
 575
 576static int
 577i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 578                                  struct drm_i915_gem_execbuffer2 *args,
 579                                  struct drm_file *file,
 580                                  struct intel_ring_buffer *ring,
 581                                  struct eb_objects *eb,
 582                                  struct drm_i915_gem_exec_object2 *exec)
 583{
 584        struct drm_i915_gem_relocation_entry *reloc;
 585        struct drm_i915_gem_object *obj;
 586        bool need_relocs;
 587        int *reloc_offset;
 588        int i, total, ret;
 589        int count = args->buffer_count;
 590
 591        /* We may process another execbuffer during the unlock... */
 592        while (!list_empty(&eb->objects)) {
 593                obj = list_first_entry(&eb->objects,
 594                                       struct drm_i915_gem_object,
 595                                       exec_list);
 596                list_del_init(&obj->exec_list);
 597                drm_gem_object_unreference(&obj->base);
 598        }
 599
 600        mutex_unlock(&dev->struct_mutex);
 601
 602        total = 0;
 603        for (i = 0; i < count; i++)
 604                total += exec[i].relocation_count;
 605
 606        reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
 607        reloc = drm_malloc_ab(total, sizeof(*reloc));
 608        if (reloc == NULL || reloc_offset == NULL) {
 609                drm_free_large(reloc);
 610                drm_free_large(reloc_offset);
 611                mutex_lock(&dev->struct_mutex);
 612                return -ENOMEM;
 613        }
 614
 615        total = 0;
 616        for (i = 0; i < count; i++) {
 617                struct drm_i915_gem_relocation_entry __user *user_relocs;
 618                u64 invalid_offset = (u64)-1;
 619                int j;
 620
 621                user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr;
 622
 623                if (copy_from_user(reloc+total, user_relocs,
 624                                   exec[i].relocation_count * sizeof(*reloc))) {
 625                        ret = -EFAULT;
 626                        mutex_lock(&dev->struct_mutex);
 627                        goto err;
 628                }
 629
 630                /* As we do not update the known relocation offsets after
 631                 * relocating (due to the complexities in lock handling),
 632                 * we need to mark them as invalid now so that we force the
 633                 * relocation processing next time. Just in case the target
 634                 * object is evicted and then rebound into its old
 635                 * presumed_offset before the next execbuffer - if that
 636                 * happened we would make the mistake of assuming that the
 637                 * relocations were valid.
 638                 */
 639                for (j = 0; j < exec[i].relocation_count; j++) {
 640                        if (copy_to_user(&user_relocs[j].presumed_offset,
 641                                         &invalid_offset,
 642                                         sizeof(invalid_offset))) {
 643                                ret = -EFAULT;
 644                                mutex_lock(&dev->struct_mutex);
 645                                goto err;
 646                        }
 647                }
 648
 649                reloc_offset[i] = total;
 650                total += exec[i].relocation_count;
 651        }
 652
 653        ret = i915_mutex_lock_interruptible(dev);
 654        if (ret) {
 655                mutex_lock(&dev->struct_mutex);
 656                goto err;
 657        }
 658
 659        /* reacquire the objects */
 660        eb_reset(eb);
 661        ret = eb_lookup_objects(eb, exec, args, file);
 662        if (ret)
 663                goto err;
 664
 665        need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
 666        ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects, &need_relocs);
 667        if (ret)
 668                goto err;
 669
 670        list_for_each_entry(obj, &eb->objects, exec_list) {
 671                int offset = obj->exec_entry - exec;
 672                ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
 673                                                               reloc + reloc_offset[offset]);
 674                if (ret)
 675                        goto err;
 676        }
 677
 678        /* Leave the user relocations as are, this is the painfully slow path,
 679         * and we want to avoid the complication of dropping the lock whilst
 680         * having buffers reserved in the aperture and so causing spurious
 681         * ENOSPC for random operations.
 682         */
 683
 684err:
 685        drm_free_large(reloc);
 686        drm_free_large(reloc_offset);
 687        return ret;
 688}
 689
 690static int
 691i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
 692                                struct list_head *objects)
 693{
 694        struct drm_i915_gem_object *obj;
 695        uint32_t flush_domains = 0;
 696        int ret;
 697
 698        list_for_each_entry(obj, objects, exec_list) {
 699                ret = i915_gem_object_sync(obj, ring);
 700                if (ret)
 701                        return ret;
 702
 703                if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
 704                        i915_gem_clflush_object(obj);
 705
 706                flush_domains |= obj->base.write_domain;
 707        }
 708
 709        if (flush_domains & I915_GEM_DOMAIN_CPU)
 710                i915_gem_chipset_flush(ring->dev);
 711
 712        if (flush_domains & I915_GEM_DOMAIN_GTT)
 713                wmb();
 714
 715        /* Unconditionally invalidate gpu caches and ensure that we do flush
 716         * any residual writes from the previous batch.
 717         */
 718        return intel_ring_invalidate_all_caches(ring);
 719}
 720
 721static bool
 722i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 723{
 724        if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
 725                return false;
 726
 727        return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
 728}
 729
 730static int
 731validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
 732                   int count)
 733{
 734        int i;
 735        int relocs_total = 0;
 736        int relocs_max = INT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
 737
 738        for (i = 0; i < count; i++) {
 739                char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
 740                int length; /* limited by fault_in_pages_readable() */
 741
 742                if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
 743                        return -EINVAL;
 744
 745                /* First check for malicious input causing overflow in
 746                 * the worst case where we need to allocate the entire
 747                 * relocation tree as a single array.
 748                 */
 749                if (exec[i].relocation_count > relocs_max - relocs_total)
 750                        return -EINVAL;
 751                relocs_total += exec[i].relocation_count;
 752
 753                length = exec[i].relocation_count *
 754                        sizeof(struct drm_i915_gem_relocation_entry);
 755                /* we may also need to update the presumed offsets */
 756                if (!access_ok(VERIFY_WRITE, ptr, length))
 757                        return -EFAULT;
 758
 759                if (fault_in_multipages_readable(ptr, length))
 760                        return -EFAULT;
 761        }
 762
 763        return 0;
 764}
 765
 766static void
 767i915_gem_execbuffer_move_to_active(struct list_head *objects,
 768                                   struct intel_ring_buffer *ring)
 769{
 770        struct drm_i915_gem_object *obj;
 771
 772        list_for_each_entry(obj, objects, exec_list) {
 773                u32 old_read = obj->base.read_domains;
 774                u32 old_write = obj->base.write_domain;
 775
 776                obj->base.write_domain = obj->base.pending_write_domain;
 777                if (obj->base.write_domain == 0)
 778                        obj->base.pending_read_domains |= obj->base.read_domains;
 779                obj->base.read_domains = obj->base.pending_read_domains;
 780                obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
 781
 782                i915_gem_object_move_to_active(obj, ring);
 783                if (obj->base.write_domain) {
 784                        obj->dirty = 1;
 785                        obj->last_write_seqno = intel_ring_get_seqno(ring);
 786                        if (obj->pin_count) /* check for potential scanout */
 787                                intel_mark_fb_busy(obj);
 788                }
 789
 790                trace_i915_gem_object_change_domain(obj, old_read, old_write);
 791        }
 792}
 793
 794static void
 795i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 796                                    struct drm_file *file,
 797                                    struct intel_ring_buffer *ring)
 798{
 799        /* Unconditionally force add_request to emit a full flush. */
 800        ring->gpu_caches_dirty = true;
 801
 802        /* Add a breadcrumb for the completion of the batch buffer */
 803        (void)i915_add_request(ring, file, NULL);
 804}
 805
 806static int
 807i915_reset_gen7_sol_offsets(struct drm_device *dev,
 808                            struct intel_ring_buffer *ring)
 809{
 810        drm_i915_private_t *dev_priv = dev->dev_private;
 811        int ret, i;
 812
 813        if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS])
 814                return 0;
 815
 816        ret = intel_ring_begin(ring, 4 * 3);
 817        if (ret)
 818                return ret;
 819
 820        for (i = 0; i < 4; i++) {
 821                intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
 822                intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
 823                intel_ring_emit(ring, 0);
 824        }
 825
 826        intel_ring_advance(ring);
 827
 828        return 0;
 829}
 830
 831static int
 832i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 833                       struct drm_file *file,
 834                       struct drm_i915_gem_execbuffer2 *args,
 835                       struct drm_i915_gem_exec_object2 *exec)
 836{
 837        drm_i915_private_t *dev_priv = dev->dev_private;
 838        struct eb_objects *eb;
 839        struct drm_i915_gem_object *batch_obj;
 840        struct drm_clip_rect *cliprects = NULL;
 841        struct intel_ring_buffer *ring;
 842        u32 ctx_id = i915_execbuffer2_get_context_id(*args);
 843        u32 exec_start, exec_len;
 844        u32 mask, flags;
 845        int ret, mode, i;
 846        bool need_relocs;
 847
 848        if (!i915_gem_check_execbuffer(args))
 849                return -EINVAL;
 850
 851        ret = validate_exec_list(exec, args->buffer_count);
 852        if (ret)
 853                return ret;
 854
 855        flags = 0;
 856        if (args->flags & I915_EXEC_SECURE) {
 857                if (!file->is_master || !capable(CAP_SYS_ADMIN))
 858                    return -EPERM;
 859
 860                flags |= I915_DISPATCH_SECURE;
 861        }
 862        if (args->flags & I915_EXEC_IS_PINNED)
 863                flags |= I915_DISPATCH_PINNED;
 864
 865        switch (args->flags & I915_EXEC_RING_MASK) {
 866        case I915_EXEC_DEFAULT:
 867        case I915_EXEC_RENDER:
 868                ring = &dev_priv->ring[RCS];
 869                break;
 870        case I915_EXEC_BSD:
 871                ring = &dev_priv->ring[VCS];
 872                if (ctx_id != 0) {
 873                        DRM_DEBUG("Ring %s doesn't support contexts\n",
 874                                  ring->name);
 875                        return -EPERM;
 876                }
 877                break;
 878        case I915_EXEC_BLT:
 879                ring = &dev_priv->ring[BCS];
 880                if (ctx_id != 0) {
 881                        DRM_DEBUG("Ring %s doesn't support contexts\n",
 882                                  ring->name);
 883                        return -EPERM;
 884                }
 885                break;
 886        default:
 887                DRM_DEBUG("execbuf with unknown ring: %d\n",
 888                          (int)(args->flags & I915_EXEC_RING_MASK));
 889                return -EINVAL;
 890        }
 891        if (!intel_ring_initialized(ring)) {
 892                DRM_DEBUG("execbuf with invalid ring: %d\n",
 893                          (int)(args->flags & I915_EXEC_RING_MASK));
 894                return -EINVAL;
 895        }
 896
 897        mode = args->flags & I915_EXEC_CONSTANTS_MASK;
 898        mask = I915_EXEC_CONSTANTS_MASK;
 899        switch (mode) {
 900        case I915_EXEC_CONSTANTS_REL_GENERAL:
 901        case I915_EXEC_CONSTANTS_ABSOLUTE:
 902        case I915_EXEC_CONSTANTS_REL_SURFACE:
 903                if (ring == &dev_priv->ring[RCS] &&
 904                    mode != dev_priv->relative_constants_mode) {
 905                        if (INTEL_INFO(dev)->gen < 4)
 906                                return -EINVAL;
 907
 908                        if (INTEL_INFO(dev)->gen > 5 &&
 909                            mode == I915_EXEC_CONSTANTS_REL_SURFACE)
 910                                return -EINVAL;
 911
 912                        /* The HW changed the meaning on this bit on gen6 */
 913                        if (INTEL_INFO(dev)->gen >= 6)
 914                                mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
 915                }
 916                break;
 917        default:
 918                DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
 919                return -EINVAL;
 920        }
 921
 922        if (args->buffer_count < 1) {
 923                DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
 924                return -EINVAL;
 925        }
 926
 927        if (args->num_cliprects != 0) {
 928                if (ring != &dev_priv->ring[RCS]) {
 929                        DRM_DEBUG("clip rectangles are only valid with the render ring\n");
 930                        return -EINVAL;
 931                }
 932
 933                if (INTEL_INFO(dev)->gen >= 5) {
 934                        DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
 935                        return -EINVAL;
 936                }
 937
 938                if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
 939                        DRM_DEBUG("execbuf with %u cliprects\n",
 940                                  args->num_cliprects);
 941                        return -EINVAL;
 942                }
 943
 944                cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects),
 945                                    GFP_KERNEL);
 946                if (cliprects == NULL) {
 947                        ret = -ENOMEM;
 948                        goto pre_mutex_err;
 949                }
 950
 951                if (copy_from_user(cliprects,
 952                                     (struct drm_clip_rect __user *)(uintptr_t)
 953                                     args->cliprects_ptr,
 954                                     sizeof(*cliprects)*args->num_cliprects)) {
 955                        ret = -EFAULT;
 956                        goto pre_mutex_err;
 957                }
 958        }
 959
 960        ret = i915_mutex_lock_interruptible(dev);
 961        if (ret)
 962                goto pre_mutex_err;
 963
 964        if (dev_priv->mm.suspended) {
 965                mutex_unlock(&dev->struct_mutex);
 966                ret = -EBUSY;
 967                goto pre_mutex_err;
 968        }
 969
 970        eb = eb_create(args);
 971        if (eb == NULL) {
 972                mutex_unlock(&dev->struct_mutex);
 973                ret = -ENOMEM;
 974                goto pre_mutex_err;
 975        }
 976
 977        /* Look up object handles */
 978        ret = eb_lookup_objects(eb, exec, args, file);
 979        if (ret)
 980                goto err;
 981
 982        /* take note of the batch buffer before we might reorder the lists */
 983        batch_obj = list_entry(eb->objects.prev,
 984                               struct drm_i915_gem_object,
 985                               exec_list);
 986
 987        /* Move the objects en-masse into the GTT, evicting if necessary. */
 988        need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
 989        ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects, &need_relocs);
 990        if (ret)
 991                goto err;
 992
 993        /* The objects are in their final locations, apply the relocations. */
 994        if (need_relocs)
 995                ret = i915_gem_execbuffer_relocate(dev, eb);
 996        if (ret) {
 997                if (ret == -EFAULT) {
 998                        ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
 999                                                                eb, exec);
1000                        BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1001                }
1002                if (ret)
1003                        goto err;
1004        }
1005
1006        /* Set the pending read domains for the batch buffer to COMMAND */
1007        if (batch_obj->base.pending_write_domain) {
1008                DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1009                ret = -EINVAL;
1010                goto err;
1011        }
1012        batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1013
1014        /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1015         * batch" bit. Hence we need to pin secure batches into the global gtt.
1016         * hsw should have this fixed, but let's be paranoid and do it
1017         * unconditionally for now. */
1018        if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
1019                i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
1020
1021        ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->objects);
1022        if (ret)
1023                goto err;
1024
1025        ret = i915_switch_context(ring, file, ctx_id);
1026        if (ret)
1027                goto err;
1028
1029        if (ring == &dev_priv->ring[RCS] &&
1030            mode != dev_priv->relative_constants_mode) {
1031                ret = intel_ring_begin(ring, 4);
1032                if (ret)
1033                                goto err;
1034
1035                intel_ring_emit(ring, MI_NOOP);
1036                intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1037                intel_ring_emit(ring, INSTPM);
1038                intel_ring_emit(ring, mask << 16 | mode);
1039                intel_ring_advance(ring);
1040
1041                dev_priv->relative_constants_mode = mode;
1042        }
1043
1044        if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1045                ret = i915_reset_gen7_sol_offsets(dev, ring);
1046                if (ret)
1047                        goto err;
1048        }
1049
1050        exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1051        exec_len = args->batch_len;
1052        if (cliprects) {
1053                for (i = 0; i < args->num_cliprects; i++) {
1054                        ret = i915_emit_box(dev, &cliprects[i],
1055                                            args->DR1, args->DR4);
1056                        if (ret)
1057                                goto err;
1058
1059                        ret = ring->dispatch_execbuffer(ring,
1060                                                        exec_start, exec_len,
1061                                                        flags);
1062                        if (ret)
1063                                goto err;
1064                }
1065        } else {
1066                ret = ring->dispatch_execbuffer(ring,
1067                                                exec_start, exec_len,
1068                                                flags);
1069                if (ret)
1070                        goto err;
1071        }
1072
1073        trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
1074
1075        i915_gem_execbuffer_move_to_active(&eb->objects, ring);
1076        i915_gem_execbuffer_retire_commands(dev, file, ring);
1077
1078err:
1079        eb_destroy(eb);
1080
1081        mutex_unlock(&dev->struct_mutex);
1082
1083pre_mutex_err:
1084        kfree(cliprects);
1085        return ret;
1086}
1087
1088/*
1089 * Legacy execbuffer just creates an exec2 list from the original exec object
1090 * list array and passes it to the real function.
1091 */
1092int
1093i915_gem_execbuffer(struct drm_device *dev, void *data,
1094                    struct drm_file *file)
1095{
1096        struct drm_i915_gem_execbuffer *args = data;
1097        struct drm_i915_gem_execbuffer2 exec2;
1098        struct drm_i915_gem_exec_object *exec_list = NULL;
1099        struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1100        int ret, i;
1101
1102        if (args->buffer_count < 1) {
1103                DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1104                return -EINVAL;
1105        }
1106
1107        /* Copy in the exec list from userland */
1108        exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1109        exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1110        if (exec_list == NULL || exec2_list == NULL) {
1111                DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1112                          args->buffer_count);
1113                drm_free_large(exec_list);
1114                drm_free_large(exec2_list);
1115                return -ENOMEM;
1116        }
1117        ret = copy_from_user(exec_list,
1118                             (void __user *)(uintptr_t)args->buffers_ptr,
1119                             sizeof(*exec_list) * args->buffer_count);
1120        if (ret != 0) {
1121                DRM_DEBUG("copy %d exec entries failed %d\n",
1122                          args->buffer_count, ret);
1123                drm_free_large(exec_list);
1124                drm_free_large(exec2_list);
1125                return -EFAULT;
1126        }
1127
1128        for (i = 0; i < args->buffer_count; i++) {
1129                exec2_list[i].handle = exec_list[i].handle;
1130                exec2_list[i].relocation_count = exec_list[i].relocation_count;
1131                exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1132                exec2_list[i].alignment = exec_list[i].alignment;
1133                exec2_list[i].offset = exec_list[i].offset;
1134                if (INTEL_INFO(dev)->gen < 4)
1135                        exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1136                else
1137                        exec2_list[i].flags = 0;
1138        }
1139
1140        exec2.buffers_ptr = args->buffers_ptr;
1141        exec2.buffer_count = args->buffer_count;
1142        exec2.batch_start_offset = args->batch_start_offset;
1143        exec2.batch_len = args->batch_len;
1144        exec2.DR1 = args->DR1;
1145        exec2.DR4 = args->DR4;
1146        exec2.num_cliprects = args->num_cliprects;
1147        exec2.cliprects_ptr = args->cliprects_ptr;
1148        exec2.flags = I915_EXEC_RENDER;
1149        i915_execbuffer2_set_context_id(exec2, 0);
1150
1151        ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1152        if (!ret) {
1153                /* Copy the new buffer offsets back to the user's exec list. */
1154                for (i = 0; i < args->buffer_count; i++)
1155                        exec_list[i].offset = exec2_list[i].offset;
1156                /* ... and back out to userspace */
1157                ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,
1158                                   exec_list,
1159                                   sizeof(*exec_list) * args->buffer_count);
1160                if (ret) {
1161                        ret = -EFAULT;
1162                        DRM_DEBUG("failed to copy %d exec entries "
1163                                  "back to user (%d)\n",
1164                                  args->buffer_count, ret);
1165                }
1166        }
1167
1168        drm_free_large(exec_list);
1169        drm_free_large(exec2_list);
1170        return ret;
1171}
1172
1173int
1174i915_gem_execbuffer2(struct drm_device *dev, void *data,
1175                     struct drm_file *file)
1176{
1177        struct drm_i915_gem_execbuffer2 *args = data;
1178        struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1179        int ret;
1180
1181        if (args->buffer_count < 1 ||
1182            args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1183                DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1184                return -EINVAL;
1185        }
1186
1187        exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1188                             GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
1189        if (exec2_list == NULL)
1190                exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1191                                           args->buffer_count);
1192        if (exec2_list == NULL) {
1193                DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1194                          args->buffer_count);
1195                return -ENOMEM;
1196        }
1197        ret = copy_from_user(exec2_list,
1198                             (struct drm_i915_relocation_entry __user *)
1199                             (uintptr_t) args->buffers_ptr,
1200                             sizeof(*exec2_list) * args->buffer_count);
1201        if (ret != 0) {
1202                DRM_DEBUG("copy %d exec entries failed %d\n",
1203                          args->buffer_count, ret);
1204                drm_free_large(exec2_list);
1205                return -EFAULT;
1206        }
1207
1208        ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1209        if (!ret) {
1210                /* Copy the new buffer offsets back to the user's exec list. */
1211                ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,
1212                                   exec2_list,
1213                                   sizeof(*exec2_list) * args->buffer_count);
1214                if (ret) {
1215                        ret = -EFAULT;
1216                        DRM_DEBUG("failed to copy %d exec entries "
1217                                  "back to user (%d)\n",
1218                                  args->buffer_count, ret);
1219                }
1220        }
1221
1222        drm_free_large(exec2_list);
1223        return ret;
1224}
1225