linux/drivers/gpu/drm/i915/i915_gem_execbuffer.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2008,2010 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * Authors:
  24 *    Eric Anholt <eric@anholt.net>
  25 *    Chris Wilson <chris@chris-wilson.co.uk>
  26 *
  27 */
  28
  29#include <linux/dma_remapping.h>
  30#include <linux/reservation.h>
  31#include <linux/sync_file.h>
  32#include <linux/uaccess.h>
  33
  34#include <drm/drmP.h>
  35#include <drm/drm_syncobj.h>
  36#include <drm/i915_drm.h>
  37
  38#include "i915_drv.h"
  39#include "i915_gem_clflush.h"
  40#include "i915_trace.h"
  41#include "intel_drv.h"
  42#include "intel_frontbuffer.h"
  43
  44enum {
  45        FORCE_CPU_RELOC = 1,
  46        FORCE_GTT_RELOC,
  47        FORCE_GPU_RELOC,
  48#define DBG_FORCE_RELOC 0 /* choose one of the above! */
  49};
  50
  51#define __EXEC_OBJECT_HAS_REF           BIT(31)
  52#define __EXEC_OBJECT_HAS_PIN           BIT(30)
  53#define __EXEC_OBJECT_HAS_FENCE         BIT(29)
  54#define __EXEC_OBJECT_NEEDS_MAP         BIT(28)
  55#define __EXEC_OBJECT_NEEDS_BIAS        BIT(27)
  56#define __EXEC_OBJECT_INTERNAL_FLAGS    (~0u << 27) /* all of the above */
  57#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
  58
  59#define __EXEC_HAS_RELOC        BIT(31)
  60#define __EXEC_VALIDATED        BIT(30)
  61#define __EXEC_INTERNAL_FLAGS   (~0u << 30)
  62#define UPDATE                  PIN_OFFSET_FIXED
  63
  64#define BATCH_OFFSET_BIAS (256*1024)
  65
  66#define __I915_EXEC_ILLEGAL_FLAGS \
  67        (__I915_EXEC_UNKNOWN_FLAGS | I915_EXEC_CONSTANTS_MASK)
  68
  69/**
  70 * DOC: User command execution
  71 *
  72 * Userspace submits commands to be executed on the GPU as an instruction
  73 * stream within a GEM object we call a batchbuffer. This instructions may
  74 * refer to other GEM objects containing auxiliary state such as kernels,
  75 * samplers, render targets and even secondary batchbuffers. Userspace does
  76 * not know where in the GPU memory these objects reside and so before the
  77 * batchbuffer is passed to the GPU for execution, those addresses in the
  78 * batchbuffer and auxiliary objects are updated. This is known as relocation,
  79 * or patching. To try and avoid having to relocate each object on the next
  80 * execution, userspace is told the location of those objects in this pass,
  81 * but this remains just a hint as the kernel may choose a new location for
  82 * any object in the future.
  83 *
  84 * Processing an execbuf ioctl is conceptually split up into a few phases.
  85 *
  86 * 1. Validation - Ensure all the pointers, handles and flags are valid.
  87 * 2. Reservation - Assign GPU address space for every object
  88 * 3. Relocation - Update any addresses to point to the final locations
  89 * 4. Serialisation - Order the request with respect to its dependencies
  90 * 5. Construction - Construct a request to execute the batchbuffer
  91 * 6. Submission (at some point in the future execution)
  92 *
  93 * Reserving resources for the execbuf is the most complicated phase. We
  94 * neither want to have to migrate the object in the address space, nor do
  95 * we want to have to update any relocations pointing to this object. Ideally,
  96 * we want to leave the object where it is and for all the existing relocations
  97 * to match. If the object is given a new address, or if userspace thinks the
  98 * object is elsewhere, we have to parse all the relocation entries and update
  99 * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
 100 * all the target addresses in all of its objects match the value in the
 101 * relocation entries and that they all match the presumed offsets given by the
 102 * list of execbuffer objects. Using this knowledge, we know that if we haven't
 103 * moved any buffers, all the relocation entries are valid and we can skip
 104 * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
 105 * hang.) The requirement for using I915_EXEC_NO_RELOC are:
 106 *
 107 *      The addresses written in the objects must match the corresponding
 108 *      reloc.presumed_offset which in turn must match the corresponding
 109 *      execobject.offset.
 110 *
 111 *      Any render targets written to in the batch must be flagged with
 112 *      EXEC_OBJECT_WRITE.
 113 *
 114 *      To avoid stalling, execobject.offset should match the current
 115 *      address of that object within the active context.
 116 *
 117 * The reservation is done is multiple phases. First we try and keep any
 118 * object already bound in its current location - so as long as meets the
 119 * constraints imposed by the new execbuffer. Any object left unbound after the
 120 * first pass is then fitted into any available idle space. If an object does
 121 * not fit, all objects are removed from the reservation and the process rerun
 122 * after sorting the objects into a priority order (more difficult to fit
 123 * objects are tried first). Failing that, the entire VM is cleared and we try
 124 * to fit the execbuf once last time before concluding that it simply will not
 125 * fit.
 126 *
 127 * A small complication to all of this is that we allow userspace not only to
 128 * specify an alignment and a size for the object in the address space, but
 129 * we also allow userspace to specify the exact offset. This objects are
 130 * simpler to place (the location is known a priori) all we have to do is make
 131 * sure the space is available.
 132 *
 133 * Once all the objects are in place, patching up the buried pointers to point
 134 * to the final locations is a fairly simple job of walking over the relocation
 135 * entry arrays, looking up the right address and rewriting the value into
 136 * the object. Simple! ... The relocation entries are stored in user memory
 137 * and so to access them we have to copy them into a local buffer. That copy
 138 * has to avoid taking any pagefaults as they may lead back to a GEM object
 139 * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
 140 * the relocation into multiple passes. First we try to do everything within an
 141 * atomic context (avoid the pagefaults) which requires that we never wait. If
 142 * we detect that we may wait, or if we need to fault, then we have to fallback
 143 * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
 144 * bells yet?) Dropping the mutex means that we lose all the state we have
 145 * built up so far for the execbuf and we must reset any global data. However,
 146 * we do leave the objects pinned in their final locations - which is a
 147 * potential issue for concurrent execbufs. Once we have left the mutex, we can
 148 * allocate and copy all the relocation entries into a large array at our
 149 * leisure, reacquire the mutex, reclaim all the objects and other state and
 150 * then proceed to update any incorrect addresses with the objects.
 151 *
 152 * As we process the relocation entries, we maintain a record of whether the
 153 * object is being written to. Using NORELOC, we expect userspace to provide
 154 * this information instead. We also check whether we can skip the relocation
 155 * by comparing the expected value inside the relocation entry with the target's
 156 * final address. If they differ, we have to map the current object and rewrite
 157 * the 4 or 8 byte pointer within.
 158 *
 159 * Serialising an execbuf is quite simple according to the rules of the GEM
 160 * ABI. Execution within each context is ordered by the order of submission.
 161 * Writes to any GEM object are in order of submission and are exclusive. Reads
 162 * from a GEM object are unordered with respect to other reads, but ordered by
 163 * writes. A write submitted after a read cannot occur before the read, and
 164 * similarly any read submitted after a write cannot occur before the write.
 165 * Writes are ordered between engines such that only one write occurs at any
 166 * time (completing any reads beforehand) - using semaphores where available
 167 * and CPU serialisation otherwise. Other GEM access obey the same rules, any
 168 * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
 169 * reads before starting, and any read (either using set-domain or pread) must
 170 * flush all GPU writes before starting. (Note we only employ a barrier before,
 171 * we currently rely on userspace not concurrently starting a new execution
 172 * whilst reading or writing to an object. This may be an advantage or not
 173 * depending on how much you trust userspace not to shoot themselves in the
 174 * foot.) Serialisation may just result in the request being inserted into
 175 * a DAG awaiting its turn, but most simple is to wait on the CPU until
 176 * all dependencies are resolved.
 177 *
 178 * After all of that, is just a matter of closing the request and handing it to
 179 * the hardware (well, leaving it in a queue to be executed). However, we also
 180 * offer the ability for batchbuffers to be run with elevated privileges so
 181 * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
 182 * Before any batch is given extra privileges we first must check that it
 183 * contains no nefarious instructions, we check that each instruction is from
 184 * our whitelist and all registers are also from an allowed list. We first
 185 * copy the user's batchbuffer to a shadow (so that the user doesn't have
 186 * access to it, either by the CPU or GPU as we scan it) and then parse each
 187 * instruction. If everything is ok, we set a flag telling the hardware to run
 188 * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
 189 */
 190
 191struct i915_execbuffer {
 192        struct drm_i915_private *i915; /** i915 backpointer */
 193        struct drm_file *file; /** per-file lookup tables and limits */
 194        struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
 195        struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
 196        struct i915_vma **vma;
 197        unsigned int *flags;
 198
 199        struct intel_engine_cs *engine; /** engine to queue the request to */
 200        struct i915_gem_context *ctx; /** context for building the request */
 201        struct i915_address_space *vm; /** GTT and vma for the request */
 202
 203        struct drm_i915_gem_request *request; /** our request to build */
 204        struct i915_vma *batch; /** identity of the batch obj/vma */
 205
 206        /** actual size of execobj[] as we may extend it for the cmdparser */
 207        unsigned int buffer_count;
 208
 209        /** list of vma not yet bound during reservation phase */
 210        struct list_head unbound;
 211
 212        /** list of vma that have execobj.relocation_count */
 213        struct list_head relocs;
 214
 215        /**
 216         * Track the most recently used object for relocations, as we
 217         * frequently have to perform multiple relocations within the same
 218         * obj/page
 219         */
 220        struct reloc_cache {
 221                struct drm_mm_node node; /** temporary GTT binding */
 222                unsigned long vaddr; /** Current kmap address */
 223                unsigned long page; /** Currently mapped page index */
 224                unsigned int gen; /** Cached value of INTEL_GEN */
 225                bool use_64bit_reloc : 1;
 226                bool has_llc : 1;
 227                bool has_fence : 1;
 228                bool needs_unfenced : 1;
 229
 230                struct drm_i915_gem_request *rq;
 231                u32 *rq_cmd;
 232                unsigned int rq_size;
 233        } reloc_cache;
 234
 235        u64 invalid_flags; /** Set of execobj.flags that are invalid */
 236        u32 context_flags; /** Set of execobj.flags to insert from the ctx */
 237
 238        u32 batch_start_offset; /** Location within object of batch */
 239        u32 batch_len; /** Length of batch within object */
 240        u32 batch_flags; /** Flags composed for emit_bb_start() */
 241
 242        /**
 243         * Indicate either the size of the hastable used to resolve
 244         * relocation handles, or if negative that we are using a direct
 245         * index into the execobj[].
 246         */
 247        int lut_size;
 248        struct hlist_head *buckets; /** ht for relocation handles */
 249};
 250
 251#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
 252
 253/*
 254 * Used to convert any address to canonical form.
 255 * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
 256 * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
 257 * addresses to be in a canonical form:
 258 * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
 259 * canonical form [63:48] == [47]."
 260 */
 261#define GEN8_HIGH_ADDRESS_BIT 47
 262static inline u64 gen8_canonical_addr(u64 address)
 263{
 264        return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
 265}
 266
 267static inline u64 gen8_noncanonical_addr(u64 address)
 268{
 269        return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
 270}
 271
 272static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 273{
 274        return eb->engine->needs_cmd_parser && eb->batch_len;
 275}
 276
 277static int eb_create(struct i915_execbuffer *eb)
 278{
 279        if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
 280                unsigned int size = 1 + ilog2(eb->buffer_count);
 281
 282                /*
 283                 * Without a 1:1 association between relocation handles and
 284                 * the execobject[] index, we instead create a hashtable.
 285                 * We size it dynamically based on available memory, starting
 286                 * first with 1:1 assocative hash and scaling back until
 287                 * the allocation succeeds.
 288                 *
 289                 * Later on we use a positive lut_size to indicate we are
 290                 * using this hashtable, and a negative value to indicate a
 291                 * direct lookup.
 292                 */
 293                do {
 294                        gfp_t flags;
 295
 296                        /* While we can still reduce the allocation size, don't
 297                         * raise a warning and allow the allocation to fail.
 298                         * On the last pass though, we want to try as hard
 299                         * as possible to perform the allocation and warn
 300                         * if it fails.
 301                         */
 302                        flags = GFP_KERNEL;
 303                        if (size > 1)
 304                                flags |= __GFP_NORETRY | __GFP_NOWARN;
 305
 306                        eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
 307                                              flags);
 308                        if (eb->buckets)
 309                                break;
 310                } while (--size);
 311
 312                if (unlikely(!size))
 313                        return -ENOMEM;
 314
 315                eb->lut_size = size;
 316        } else {
 317                eb->lut_size = -eb->buffer_count;
 318        }
 319
 320        return 0;
 321}
 322
 323static bool
 324eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
 325                 const struct i915_vma *vma,
 326                 unsigned int flags)
 327{
 328        if (vma->node.size < entry->pad_to_size)
 329                return true;
 330
 331        if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
 332                return true;
 333
 334        if (flags & EXEC_OBJECT_PINNED &&
 335            vma->node.start != entry->offset)
 336                return true;
 337
 338        if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
 339            vma->node.start < BATCH_OFFSET_BIAS)
 340                return true;
 341
 342        if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
 343            (vma->node.start + vma->node.size - 1) >> 32)
 344                return true;
 345
 346        if (flags & __EXEC_OBJECT_NEEDS_MAP &&
 347            !i915_vma_is_map_and_fenceable(vma))
 348                return true;
 349
 350        return false;
 351}
 352
 353static inline bool
 354eb_pin_vma(struct i915_execbuffer *eb,
 355           const struct drm_i915_gem_exec_object2 *entry,
 356           struct i915_vma *vma)
 357{
 358        unsigned int exec_flags = *vma->exec_flags;
 359        u64 pin_flags;
 360
 361        if (vma->node.size)
 362                pin_flags = vma->node.start;
 363        else
 364                pin_flags = entry->offset & PIN_OFFSET_MASK;
 365
 366        pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
 367        if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT))
 368                pin_flags |= PIN_GLOBAL;
 369
 370        if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
 371                return false;
 372
 373        if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
 374                if (unlikely(i915_vma_pin_fence(vma))) {
 375                        i915_vma_unpin(vma);
 376                        return false;
 377                }
 378
 379                if (vma->fence)
 380                        exec_flags |= __EXEC_OBJECT_HAS_FENCE;
 381        }
 382
 383        *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
 384        return !eb_vma_misplaced(entry, vma, exec_flags);
 385}
 386
 387static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
 388{
 389        GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
 390
 391        if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
 392                __i915_vma_unpin_fence(vma);
 393
 394        __i915_vma_unpin(vma);
 395}
 396
 397static inline void
 398eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags)
 399{
 400        if (!(*flags & __EXEC_OBJECT_HAS_PIN))
 401                return;
 402
 403        __eb_unreserve_vma(vma, *flags);
 404        *flags &= ~__EXEC_OBJECT_RESERVED;
 405}
 406
 407static int
 408eb_validate_vma(struct i915_execbuffer *eb,
 409                struct drm_i915_gem_exec_object2 *entry,
 410                struct i915_vma *vma)
 411{
 412        if (unlikely(entry->flags & eb->invalid_flags))
 413                return -EINVAL;
 414
 415        if (unlikely(entry->alignment && !is_power_of_2(entry->alignment)))
 416                return -EINVAL;
 417
 418        /*
 419         * Offset can be used as input (EXEC_OBJECT_PINNED), reject
 420         * any non-page-aligned or non-canonical addresses.
 421         */
 422        if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&
 423                     entry->offset != gen8_canonical_addr(entry->offset & PAGE_MASK)))
 424                return -EINVAL;
 425
 426        /* pad_to_size was once a reserved field, so sanitize it */
 427        if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) {
 428                if (unlikely(offset_in_page(entry->pad_to_size)))
 429                        return -EINVAL;
 430        } else {
 431                entry->pad_to_size = 0;
 432        }
 433
 434        if (unlikely(vma->exec_flags)) {
 435                DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
 436                          entry->handle, (int)(entry - eb->exec));
 437                return -EINVAL;
 438        }
 439
 440        /*
 441         * From drm_mm perspective address space is continuous,
 442         * so from this point we're always using non-canonical
 443         * form internally.
 444         */
 445        entry->offset = gen8_noncanonical_addr(entry->offset);
 446
 447        if (!eb->reloc_cache.has_fence) {
 448                entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
 449        } else {
 450                if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
 451                     eb->reloc_cache.needs_unfenced) &&
 452                    i915_gem_object_is_tiled(vma->obj))
 453                        entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
 454        }
 455
 456        if (!(entry->flags & EXEC_OBJECT_PINNED))
 457                entry->flags |= eb->context_flags;
 458
 459        return 0;
 460}
 461
 462static int
 463eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
 464{
 465        struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
 466        int err;
 467
 468        GEM_BUG_ON(i915_vma_is_closed(vma));
 469
 470        if (!(eb->args->flags & __EXEC_VALIDATED)) {
 471                err = eb_validate_vma(eb, entry, vma);
 472                if (unlikely(err))
 473                        return err;
 474        }
 475
 476        if (eb->lut_size > 0) {
 477                vma->exec_handle = entry->handle;
 478                hlist_add_head(&vma->exec_node,
 479                               &eb->buckets[hash_32(entry->handle,
 480                                                    eb->lut_size)]);
 481        }
 482
 483        if (entry->relocation_count)
 484                list_add_tail(&vma->reloc_link, &eb->relocs);
 485
 486        /*
 487         * Stash a pointer from the vma to execobj, so we can query its flags,
 488         * size, alignment etc as provided by the user. Also we stash a pointer
 489         * to the vma inside the execobj so that we can use a direct lookup
 490         * to find the right target VMA when doing relocations.
 491         */
 492        eb->vma[i] = vma;
 493        eb->flags[i] = entry->flags;
 494        vma->exec_flags = &eb->flags[i];
 495
 496        err = 0;
 497        if (eb_pin_vma(eb, entry, vma)) {
 498                if (entry->offset != vma->node.start) {
 499                        entry->offset = vma->node.start | UPDATE;
 500                        eb->args->flags |= __EXEC_HAS_RELOC;
 501                }
 502        } else {
 503                eb_unreserve_vma(vma, vma->exec_flags);
 504
 505                list_add_tail(&vma->exec_link, &eb->unbound);
 506                if (drm_mm_node_allocated(&vma->node))
 507                        err = i915_vma_unbind(vma);
 508        }
 509        return err;
 510}
 511
 512static inline int use_cpu_reloc(const struct reloc_cache *cache,
 513                                const struct drm_i915_gem_object *obj)
 514{
 515        if (!i915_gem_object_has_struct_page(obj))
 516                return false;
 517
 518        if (DBG_FORCE_RELOC == FORCE_CPU_RELOC)
 519                return true;
 520
 521        if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
 522                return false;
 523
 524        return (cache->has_llc ||
 525                obj->cache_dirty ||
 526                obj->cache_level != I915_CACHE_NONE);
 527}
 528
 529static int eb_reserve_vma(const struct i915_execbuffer *eb,
 530                          struct i915_vma *vma)
 531{
 532        struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
 533        unsigned int exec_flags = *vma->exec_flags;
 534        u64 pin_flags;
 535        int err;
 536
 537        pin_flags = PIN_USER | PIN_NONBLOCK;
 538        if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
 539                pin_flags |= PIN_GLOBAL;
 540
 541        /*
 542         * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
 543         * limit address to the first 4GBs for unflagged objects.
 544         */
 545        if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
 546                pin_flags |= PIN_ZONE_4G;
 547
 548        if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
 549                pin_flags |= PIN_MAPPABLE;
 550
 551        if (exec_flags & EXEC_OBJECT_PINNED) {
 552                pin_flags |= entry->offset | PIN_OFFSET_FIXED;
 553                pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */
 554        } else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) {
 555                pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
 556        }
 557
 558        err = i915_vma_pin(vma,
 559                           entry->pad_to_size, entry->alignment,
 560                           pin_flags);
 561        if (err)
 562                return err;
 563
 564        if (entry->offset != vma->node.start) {
 565                entry->offset = vma->node.start | UPDATE;
 566                eb->args->flags |= __EXEC_HAS_RELOC;
 567        }
 568
 569        if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
 570                err = i915_vma_pin_fence(vma);
 571                if (unlikely(err)) {
 572                        i915_vma_unpin(vma);
 573                        return err;
 574                }
 575
 576                if (vma->fence)
 577                        exec_flags |= __EXEC_OBJECT_HAS_FENCE;
 578        }
 579
 580        *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
 581        GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags));
 582
 583        return 0;
 584}
 585
 586static int eb_reserve(struct i915_execbuffer *eb)
 587{
 588        const unsigned int count = eb->buffer_count;
 589        struct list_head last;
 590        struct i915_vma *vma;
 591        unsigned int i, pass;
 592        int err;
 593
 594        /*
 595         * Attempt to pin all of the buffers into the GTT.
 596         * This is done in 3 phases:
 597         *
 598         * 1a. Unbind all objects that do not match the GTT constraints for
 599         *     the execbuffer (fenceable, mappable, alignment etc).
 600         * 1b. Increment pin count for already bound objects.
 601         * 2.  Bind new objects.
 602         * 3.  Decrement pin count.
 603         *
 604         * This avoid unnecessary unbinding of later objects in order to make
 605         * room for the earlier objects *unless* we need to defragment.
 606         */
 607
 608        pass = 0;
 609        err = 0;
 610        do {
 611                list_for_each_entry(vma, &eb->unbound, exec_link) {
 612                        err = eb_reserve_vma(eb, vma);
 613                        if (err)
 614                                break;
 615                }
 616                if (err != -ENOSPC)
 617                        return err;
 618
 619                /* Resort *all* the objects into priority order */
 620                INIT_LIST_HEAD(&eb->unbound);
 621                INIT_LIST_HEAD(&last);
 622                for (i = 0; i < count; i++) {
 623                        unsigned int flags = eb->flags[i];
 624                        struct i915_vma *vma = eb->vma[i];
 625
 626                        if (flags & EXEC_OBJECT_PINNED &&
 627                            flags & __EXEC_OBJECT_HAS_PIN)
 628                                continue;
 629
 630                        eb_unreserve_vma(vma, &eb->flags[i]);
 631
 632                        if (flags & EXEC_OBJECT_PINNED)
 633                                list_add(&vma->exec_link, &eb->unbound);
 634                        else if (flags & __EXEC_OBJECT_NEEDS_MAP)
 635                                list_add_tail(&vma->exec_link, &eb->unbound);
 636                        else
 637                                list_add_tail(&vma->exec_link, &last);
 638                }
 639                list_splice_tail(&last, &eb->unbound);
 640
 641                switch (pass++) {
 642                case 0:
 643                        break;
 644
 645                case 1:
 646                        /* Too fragmented, unbind everything and retry */
 647                        err = i915_gem_evict_vm(eb->vm);
 648                        if (err)
 649                                return err;
 650                        break;
 651
 652                default:
 653                        return -ENOSPC;
 654                }
 655        } while (1);
 656}
 657
 658static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
 659{
 660        if (eb->args->flags & I915_EXEC_BATCH_FIRST)
 661                return 0;
 662        else
 663                return eb->buffer_count - 1;
 664}
 665
 666static int eb_select_context(struct i915_execbuffer *eb)
 667{
 668        struct i915_gem_context *ctx;
 669
 670        ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
 671        if (unlikely(!ctx))
 672                return -ENOENT;
 673
 674        eb->ctx = ctx;
 675        eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base;
 676
 677        eb->context_flags = 0;
 678        if (ctx->flags & CONTEXT_NO_ZEROMAP)
 679                eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS;
 680
 681        return 0;
 682}
 683
 684static int eb_lookup_vmas(struct i915_execbuffer *eb)
 685{
 686        struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
 687        struct drm_i915_gem_object *obj;
 688        unsigned int i;
 689        int err;
 690
 691        if (unlikely(i915_gem_context_is_closed(eb->ctx)))
 692                return -ENOENT;
 693
 694        if (unlikely(i915_gem_context_is_banned(eb->ctx)))
 695                return -EIO;
 696
 697        INIT_LIST_HEAD(&eb->relocs);
 698        INIT_LIST_HEAD(&eb->unbound);
 699
 700        for (i = 0; i < eb->buffer_count; i++) {
 701                u32 handle = eb->exec[i].handle;
 702                struct i915_lut_handle *lut;
 703                struct i915_vma *vma;
 704
 705                vma = radix_tree_lookup(handles_vma, handle);
 706                if (likely(vma))
 707                        goto add_vma;
 708
 709                obj = i915_gem_object_lookup(eb->file, handle);
 710                if (unlikely(!obj)) {
 711                        err = -ENOENT;
 712                        goto err_vma;
 713                }
 714
 715                vma = i915_vma_instance(obj, eb->vm, NULL);
 716                if (unlikely(IS_ERR(vma))) {
 717                        err = PTR_ERR(vma);
 718                        goto err_obj;
 719                }
 720
 721                lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL);
 722                if (unlikely(!lut)) {
 723                        err = -ENOMEM;
 724                        goto err_obj;
 725                }
 726
 727                err = radix_tree_insert(handles_vma, handle, vma);
 728                if (unlikely(err)) {
 729                        kfree(lut);
 730                        goto err_obj;
 731                }
 732
 733                /* transfer ref to ctx */
 734                vma->open_count++;
 735                list_add(&lut->obj_link, &obj->lut_list);
 736                list_add(&lut->ctx_link, &eb->ctx->handles_list);
 737                lut->ctx = eb->ctx;
 738                lut->handle = handle;
 739
 740add_vma:
 741                err = eb_add_vma(eb, i, vma);
 742                if (unlikely(err))
 743                        goto err_vma;
 744
 745                GEM_BUG_ON(vma != eb->vma[i]);
 746                GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
 747        }
 748
 749        /* take note of the batch buffer before we might reorder the lists */
 750        i = eb_batch_index(eb);
 751        eb->batch = eb->vma[i];
 752        GEM_BUG_ON(eb->batch->exec_flags != &eb->flags[i]);
 753
 754        /*
 755         * SNA is doing fancy tricks with compressing batch buffers, which leads
 756         * to negative relocation deltas. Usually that works out ok since the
 757         * relocate address is still positive, except when the batch is placed
 758         * very low in the GTT. Ensure this doesn't happen.
 759         *
 760         * Note that actual hangs have only been observed on gen7, but for
 761         * paranoia do it everywhere.
 762         */
 763        if (!(eb->flags[i] & EXEC_OBJECT_PINNED))
 764                eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
 765        if (eb->reloc_cache.has_fence)
 766                eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
 767
 768        eb->args->flags |= __EXEC_VALIDATED;
 769        return eb_reserve(eb);
 770
 771err_obj:
 772        i915_gem_object_put(obj);
 773err_vma:
 774        eb->vma[i] = NULL;
 775        return err;
 776}
 777
 778static struct i915_vma *
 779eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
 780{
 781        if (eb->lut_size < 0) {
 782                if (handle >= -eb->lut_size)
 783                        return NULL;
 784                return eb->vma[handle];
 785        } else {
 786                struct hlist_head *head;
 787                struct i915_vma *vma;
 788
 789                head = &eb->buckets[hash_32(handle, eb->lut_size)];
 790                hlist_for_each_entry(vma, head, exec_node) {
 791                        if (vma->exec_handle == handle)
 792                                return vma;
 793                }
 794                return NULL;
 795        }
 796}
 797
 798static void eb_release_vmas(const struct i915_execbuffer *eb)
 799{
 800        const unsigned int count = eb->buffer_count;
 801        unsigned int i;
 802
 803        for (i = 0; i < count; i++) {
 804                struct i915_vma *vma = eb->vma[i];
 805                unsigned int flags = eb->flags[i];
 806
 807                if (!vma)
 808                        break;
 809
 810                GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
 811                vma->exec_flags = NULL;
 812                eb->vma[i] = NULL;
 813
 814                if (flags & __EXEC_OBJECT_HAS_PIN)
 815                        __eb_unreserve_vma(vma, flags);
 816
 817                if (flags & __EXEC_OBJECT_HAS_REF)
 818                        i915_vma_put(vma);
 819        }
 820}
 821
 822static void eb_reset_vmas(const struct i915_execbuffer *eb)
 823{
 824        eb_release_vmas(eb);
 825        if (eb->lut_size > 0)
 826                memset(eb->buckets, 0,
 827                       sizeof(struct hlist_head) << eb->lut_size);
 828}
 829
 830static void eb_destroy(const struct i915_execbuffer *eb)
 831{
 832        GEM_BUG_ON(eb->reloc_cache.rq);
 833
 834        if (eb->lut_size > 0)
 835                kfree(eb->buckets);
 836}
 837
 838static inline u64
 839relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
 840                  const struct i915_vma *target)
 841{
 842        return gen8_canonical_addr((int)reloc->delta + target->node.start);
 843}
 844
 845static void reloc_cache_init(struct reloc_cache *cache,
 846                             struct drm_i915_private *i915)
 847{
 848        cache->page = -1;
 849        cache->vaddr = 0;
 850        /* Must be a variable in the struct to allow GCC to unroll. */
 851        cache->gen = INTEL_GEN(i915);
 852        cache->has_llc = HAS_LLC(i915);
 853        cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
 854        cache->has_fence = cache->gen < 4;
 855        cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
 856        cache->node.allocated = false;
 857        cache->rq = NULL;
 858        cache->rq_size = 0;
 859}
 860
 861static inline void *unmask_page(unsigned long p)
 862{
 863        return (void *)(uintptr_t)(p & PAGE_MASK);
 864}
 865
 866static inline unsigned int unmask_flags(unsigned long p)
 867{
 868        return p & ~PAGE_MASK;
 869}
 870
 871#define KMAP 0x4 /* after CLFLUSH_FLAGS */
 872
 873static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
 874{
 875        struct drm_i915_private *i915 =
 876                container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
 877        return &i915->ggtt;
 878}
 879
 880static void reloc_gpu_flush(struct reloc_cache *cache)
 881{
 882        GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32));
 883        cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
 884        i915_gem_object_unpin_map(cache->rq->batch->obj);
 885        i915_gem_chipset_flush(cache->rq->i915);
 886
 887        __i915_add_request(cache->rq, true);
 888        cache->rq = NULL;
 889}
 890
 891static void reloc_cache_reset(struct reloc_cache *cache)
 892{
 893        void *vaddr;
 894
 895        if (cache->rq)
 896                reloc_gpu_flush(cache);
 897
 898        if (!cache->vaddr)
 899                return;
 900
 901        vaddr = unmask_page(cache->vaddr);
 902        if (cache->vaddr & KMAP) {
 903                if (cache->vaddr & CLFLUSH_AFTER)
 904                        mb();
 905
 906                kunmap_atomic(vaddr);
 907                i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm);
 908        } else {
 909                wmb();
 910                io_mapping_unmap_atomic((void __iomem *)vaddr);
 911                if (cache->node.allocated) {
 912                        struct i915_ggtt *ggtt = cache_to_ggtt(cache);
 913
 914                        ggtt->base.clear_range(&ggtt->base,
 915                                               cache->node.start,
 916                                               cache->node.size);
 917                        drm_mm_remove_node(&cache->node);
 918                } else {
 919                        i915_vma_unpin((struct i915_vma *)cache->node.mm);
 920                }
 921        }
 922
 923        cache->vaddr = 0;
 924        cache->page = -1;
 925}
 926
 927static void *reloc_kmap(struct drm_i915_gem_object *obj,
 928                        struct reloc_cache *cache,
 929                        unsigned long page)
 930{
 931        void *vaddr;
 932
 933        if (cache->vaddr) {
 934                kunmap_atomic(unmask_page(cache->vaddr));
 935        } else {
 936                unsigned int flushes;
 937                int err;
 938
 939                err = i915_gem_obj_prepare_shmem_write(obj, &flushes);
 940                if (err)
 941                        return ERR_PTR(err);
 942
 943                BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
 944                BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
 945
 946                cache->vaddr = flushes | KMAP;
 947                cache->node.mm = (void *)obj;
 948                if (flushes)
 949                        mb();
 950        }
 951
 952        vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
 953        cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
 954        cache->page = page;
 955
 956        return vaddr;
 957}
 958
 959static void *reloc_iomap(struct drm_i915_gem_object *obj,
 960                         struct reloc_cache *cache,
 961                         unsigned long page)
 962{
 963        struct i915_ggtt *ggtt = cache_to_ggtt(cache);
 964        unsigned long offset;
 965        void *vaddr;
 966
 967        if (cache->vaddr) {
 968                io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
 969        } else {
 970                struct i915_vma *vma;
 971                int err;
 972
 973                if (use_cpu_reloc(cache, obj))
 974                        return NULL;
 975
 976                err = i915_gem_object_set_to_gtt_domain(obj, true);
 977                if (err)
 978                        return ERR_PTR(err);
 979
 980                vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 981                                               PIN_MAPPABLE |
 982                                               PIN_NONBLOCK |
 983                                               PIN_NONFAULT);
 984                if (IS_ERR(vma)) {
 985                        memset(&cache->node, 0, sizeof(cache->node));
 986                        err = drm_mm_insert_node_in_range
 987                                (&ggtt->base.mm, &cache->node,
 988                                 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
 989                                 0, ggtt->mappable_end,
 990                                 DRM_MM_INSERT_LOW);
 991                        if (err) /* no inactive aperture space, use cpu reloc */
 992                                return NULL;
 993                } else {
 994                        err = i915_vma_put_fence(vma);
 995                        if (err) {
 996                                i915_vma_unpin(vma);
 997                                return ERR_PTR(err);
 998                        }
 999
1000                        cache->node.start = vma->node.start;
1001                        cache->node.mm = (void *)vma;
1002                }
1003        }
1004
1005        offset = cache->node.start;
1006        if (cache->node.allocated) {
1007                wmb();
1008                ggtt->base.insert_page(&ggtt->base,
1009                                       i915_gem_object_get_dma_address(obj, page),
1010                                       offset, I915_CACHE_NONE, 0);
1011        } else {
1012                offset += page << PAGE_SHIFT;
1013        }
1014
1015        vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->mappable,
1016                                                         offset);
1017        cache->page = page;
1018        cache->vaddr = (unsigned long)vaddr;
1019
1020        return vaddr;
1021}
1022
1023static void *reloc_vaddr(struct drm_i915_gem_object *obj,
1024                         struct reloc_cache *cache,
1025                         unsigned long page)
1026{
1027        void *vaddr;
1028
1029        if (cache->page == page) {
1030                vaddr = unmask_page(cache->vaddr);
1031        } else {
1032                vaddr = NULL;
1033                if ((cache->vaddr & KMAP) == 0)
1034                        vaddr = reloc_iomap(obj, cache, page);
1035                if (!vaddr)
1036                        vaddr = reloc_kmap(obj, cache, page);
1037        }
1038
1039        return vaddr;
1040}
1041
1042static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
1043{
1044        if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
1045                if (flushes & CLFLUSH_BEFORE) {
1046                        clflushopt(addr);
1047                        mb();
1048                }
1049
1050                *addr = value;
1051
1052                /*
1053                 * Writes to the same cacheline are serialised by the CPU
1054                 * (including clflush). On the write path, we only require
1055                 * that it hits memory in an orderly fashion and place
1056                 * mb barriers at the start and end of the relocation phase
1057                 * to ensure ordering of clflush wrt to the system.
1058                 */
1059                if (flushes & CLFLUSH_AFTER)
1060                        clflushopt(addr);
1061        } else
1062                *addr = value;
1063}
1064
1065static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
1066                             struct i915_vma *vma,
1067                             unsigned int len)
1068{
1069        struct reloc_cache *cache = &eb->reloc_cache;
1070        struct drm_i915_gem_object *obj;
1071        struct drm_i915_gem_request *rq;
1072        struct i915_vma *batch;
1073        u32 *cmd;
1074        int err;
1075
1076        GEM_BUG_ON(vma->obj->base.write_domain & I915_GEM_DOMAIN_CPU);
1077
1078        obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE);
1079        if (IS_ERR(obj))
1080                return PTR_ERR(obj);
1081
1082        cmd = i915_gem_object_pin_map(obj,
1083                                      cache->has_llc ?
1084                                      I915_MAP_FORCE_WB :
1085                                      I915_MAP_FORCE_WC);
1086        i915_gem_object_unpin_pages(obj);
1087        if (IS_ERR(cmd))
1088                return PTR_ERR(cmd);
1089
1090        err = i915_gem_object_set_to_wc_domain(obj, false);
1091        if (err)
1092                goto err_unmap;
1093
1094        batch = i915_vma_instance(obj, vma->vm, NULL);
1095        if (IS_ERR(batch)) {
1096                err = PTR_ERR(batch);
1097                goto err_unmap;
1098        }
1099
1100        err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
1101        if (err)
1102                goto err_unmap;
1103
1104        rq = i915_gem_request_alloc(eb->engine, eb->ctx);
1105        if (IS_ERR(rq)) {
1106                err = PTR_ERR(rq);
1107                goto err_unpin;
1108        }
1109
1110        err = i915_gem_request_await_object(rq, vma->obj, true);
1111        if (err)
1112                goto err_request;
1113
1114        err = eb->engine->emit_flush(rq, EMIT_INVALIDATE);
1115        if (err)
1116                goto err_request;
1117
1118        err = i915_switch_context(rq);
1119        if (err)
1120                goto err_request;
1121
1122        err = eb->engine->emit_bb_start(rq,
1123                                        batch->node.start, PAGE_SIZE,
1124                                        cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
1125        if (err)
1126                goto err_request;
1127
1128        GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
1129        i915_vma_move_to_active(batch, rq, 0);
1130        reservation_object_lock(batch->resv, NULL);
1131        reservation_object_add_excl_fence(batch->resv, &rq->fence);
1132        reservation_object_unlock(batch->resv);
1133        i915_vma_unpin(batch);
1134
1135        i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1136        reservation_object_lock(vma->resv, NULL);
1137        reservation_object_add_excl_fence(vma->resv, &rq->fence);
1138        reservation_object_unlock(vma->resv);
1139
1140        rq->batch = batch;
1141
1142        cache->rq = rq;
1143        cache->rq_cmd = cmd;
1144        cache->rq_size = 0;
1145
1146        /* Return with batch mapping (cmd) still pinned */
1147        return 0;
1148
1149err_request:
1150        i915_add_request(rq);
1151err_unpin:
1152        i915_vma_unpin(batch);
1153err_unmap:
1154        i915_gem_object_unpin_map(obj);
1155        return err;
1156}
1157
1158static u32 *reloc_gpu(struct i915_execbuffer *eb,
1159                      struct i915_vma *vma,
1160                      unsigned int len)
1161{
1162        struct reloc_cache *cache = &eb->reloc_cache;
1163        u32 *cmd;
1164
1165        if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
1166                reloc_gpu_flush(cache);
1167
1168        if (unlikely(!cache->rq)) {
1169                int err;
1170
1171                /* If we need to copy for the cmdparser, we will stall anyway */
1172                if (eb_use_cmdparser(eb))
1173                        return ERR_PTR(-EWOULDBLOCK);
1174
1175                if (!intel_engine_can_store_dword(eb->engine))
1176                        return ERR_PTR(-ENODEV);
1177
1178                err = __reloc_gpu_alloc(eb, vma, len);
1179                if (unlikely(err))
1180                        return ERR_PTR(err);
1181        }
1182
1183        cmd = cache->rq_cmd + cache->rq_size;
1184        cache->rq_size += len;
1185
1186        return cmd;
1187}
1188
1189static u64
1190relocate_entry(struct i915_vma *vma,
1191               const struct drm_i915_gem_relocation_entry *reloc,
1192               struct i915_execbuffer *eb,
1193               const struct i915_vma *target)
1194{
1195        u64 offset = reloc->offset;
1196        u64 target_offset = relocation_target(reloc, target);
1197        bool wide = eb->reloc_cache.use_64bit_reloc;
1198        void *vaddr;
1199
1200        if (!eb->reloc_cache.vaddr &&
1201            (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
1202             !reservation_object_test_signaled_rcu(vma->resv, true))) {
1203                const unsigned int gen = eb->reloc_cache.gen;
1204                unsigned int len;
1205                u32 *batch;
1206                u64 addr;
1207
1208                if (wide)
1209                        len = offset & 7 ? 8 : 5;
1210                else if (gen >= 4)
1211                        len = 4;
1212                else
1213                        len = 3;
1214
1215                batch = reloc_gpu(eb, vma, len);
1216                if (IS_ERR(batch))
1217                        goto repeat;
1218
1219                addr = gen8_canonical_addr(vma->node.start + offset);
1220                if (wide) {
1221                        if (offset & 7) {
1222                                *batch++ = MI_STORE_DWORD_IMM_GEN4;
1223                                *batch++ = lower_32_bits(addr);
1224                                *batch++ = upper_32_bits(addr);
1225                                *batch++ = lower_32_bits(target_offset);
1226
1227                                addr = gen8_canonical_addr(addr + 4);
1228
1229                                *batch++ = MI_STORE_DWORD_IMM_GEN4;
1230                                *batch++ = lower_32_bits(addr);
1231                                *batch++ = upper_32_bits(addr);
1232                                *batch++ = upper_32_bits(target_offset);
1233                        } else {
1234                                *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
1235                                *batch++ = lower_32_bits(addr);
1236                                *batch++ = upper_32_bits(addr);
1237                                *batch++ = lower_32_bits(target_offset);
1238                                *batch++ = upper_32_bits(target_offset);
1239                        }
1240                } else if (gen >= 6) {
1241                        *batch++ = MI_STORE_DWORD_IMM_GEN4;
1242                        *batch++ = 0;
1243                        *batch++ = addr;
1244                        *batch++ = target_offset;
1245                } else if (gen >= 4) {
1246                        *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1247                        *batch++ = 0;
1248                        *batch++ = addr;
1249                        *batch++ = target_offset;
1250                } else {
1251                        *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
1252                        *batch++ = addr;
1253                        *batch++ = target_offset;
1254                }
1255
1256                goto out;
1257        }
1258
1259repeat:
1260        vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
1261        if (IS_ERR(vaddr))
1262                return PTR_ERR(vaddr);
1263
1264        clflush_write32(vaddr + offset_in_page(offset),
1265                        lower_32_bits(target_offset),
1266                        eb->reloc_cache.vaddr);
1267
1268        if (wide) {
1269                offset += sizeof(u32);
1270                target_offset >>= 32;
1271                wide = false;
1272                goto repeat;
1273        }
1274
1275out:
1276        return target->node.start | UPDATE;
1277}
1278
1279static u64
1280eb_relocate_entry(struct i915_execbuffer *eb,
1281                  struct i915_vma *vma,
1282                  const struct drm_i915_gem_relocation_entry *reloc)
1283{
1284        struct i915_vma *target;
1285        int err;
1286
1287        /* we've already hold a reference to all valid objects */
1288        target = eb_get_vma(eb, reloc->target_handle);
1289        if (unlikely(!target))
1290                return -ENOENT;
1291
1292        /* Validate that the target is in a valid r/w GPU domain */
1293        if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
1294                DRM_DEBUG("reloc with multiple write domains: "
1295                          "target %d offset %d "
1296                          "read %08x write %08x",
1297                          reloc->target_handle,
1298                          (int) reloc->offset,
1299                          reloc->read_domains,
1300                          reloc->write_domain);
1301                return -EINVAL;
1302        }
1303        if (unlikely((reloc->write_domain | reloc->read_domains)
1304                     & ~I915_GEM_GPU_DOMAINS)) {
1305                DRM_DEBUG("reloc with read/write non-GPU domains: "
1306                          "target %d offset %d "
1307                          "read %08x write %08x",
1308                          reloc->target_handle,
1309                          (int) reloc->offset,
1310                          reloc->read_domains,
1311                          reloc->write_domain);
1312                return -EINVAL;
1313        }
1314
1315        if (reloc->write_domain) {
1316                *target->exec_flags |= EXEC_OBJECT_WRITE;
1317
1318                /*
1319                 * Sandybridge PPGTT errata: We need a global gtt mapping
1320                 * for MI and pipe_control writes because the gpu doesn't
1321                 * properly redirect them through the ppgtt for non_secure
1322                 * batchbuffers.
1323                 */
1324                if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
1325                    IS_GEN6(eb->i915)) {
1326                        err = i915_vma_bind(target, target->obj->cache_level,
1327                                            PIN_GLOBAL);
1328                        if (WARN_ONCE(err,
1329                                      "Unexpected failure to bind target VMA!"))
1330                                return err;
1331                }
1332        }
1333
1334        /*
1335         * If the relocation already has the right value in it, no
1336         * more work needs to be done.
1337         */
1338        if (!DBG_FORCE_RELOC &&
1339            gen8_canonical_addr(target->node.start) == reloc->presumed_offset)
1340                return 0;
1341
1342        /* Check that the relocation address is valid... */
1343        if (unlikely(reloc->offset >
1344                     vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
1345                DRM_DEBUG("Relocation beyond object bounds: "
1346                          "target %d offset %d size %d.\n",
1347                          reloc->target_handle,
1348                          (int)reloc->offset,
1349                          (int)vma->size);
1350                return -EINVAL;
1351        }
1352        if (unlikely(reloc->offset & 3)) {
1353                DRM_DEBUG("Relocation not 4-byte aligned: "
1354                          "target %d offset %d.\n",
1355                          reloc->target_handle,
1356                          (int)reloc->offset);
1357                return -EINVAL;
1358        }
1359
1360        /*
1361         * If we write into the object, we need to force the synchronisation
1362         * barrier, either with an asynchronous clflush or if we executed the
1363         * patching using the GPU (though that should be serialised by the
1364         * timeline). To be completely sure, and since we are required to
1365         * do relocations we are already stalling, disable the user's opt
1366         * out of our synchronisation.
1367         */
1368        *vma->exec_flags &= ~EXEC_OBJECT_ASYNC;
1369
1370        /* and update the user's relocation entry */
1371        return relocate_entry(vma, reloc, eb, target);
1372}
1373
1374static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
1375{
1376#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
1377        struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
1378        struct drm_i915_gem_relocation_entry __user *urelocs;
1379        const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
1380        unsigned int remain;
1381
1382        urelocs = u64_to_user_ptr(entry->relocs_ptr);
1383        remain = entry->relocation_count;
1384        if (unlikely(remain > N_RELOC(ULONG_MAX)))
1385                return -EINVAL;
1386
1387        /*
1388         * We must check that the entire relocation array is safe
1389         * to read. However, if the array is not writable the user loses
1390         * the updated relocation values.
1391         */
1392        if (unlikely(!access_ok(VERIFY_READ, urelocs, remain*sizeof(*urelocs))))
1393                return -EFAULT;
1394
1395        do {
1396                struct drm_i915_gem_relocation_entry *r = stack;
1397                unsigned int count =
1398                        min_t(unsigned int, remain, ARRAY_SIZE(stack));
1399                unsigned int copied;
1400
1401                /*
1402                 * This is the fast path and we cannot handle a pagefault
1403                 * whilst holding the struct mutex lest the user pass in the
1404                 * relocations contained within a mmaped bo. For in such a case
1405                 * we, the page fault handler would call i915_gem_fault() and
1406                 * we would try to acquire the struct mutex again. Obviously
1407                 * this is bad and so lockdep complains vehemently.
1408                 */
1409                pagefault_disable();
1410                copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
1411                pagefault_enable();
1412                if (unlikely(copied)) {
1413                        remain = -EFAULT;
1414                        goto out;
1415                }
1416
1417                remain -= count;
1418                do {
1419                        u64 offset = eb_relocate_entry(eb, vma, r);
1420
1421                        if (likely(offset == 0)) {
1422                        } else if ((s64)offset < 0) {
1423                                remain = (int)offset;
1424                                goto out;
1425                        } else {
1426                                /*
1427                                 * Note that reporting an error now
1428                                 * leaves everything in an inconsistent
1429                                 * state as we have *already* changed
1430                                 * the relocation value inside the
1431                                 * object. As we have not changed the
1432                                 * reloc.presumed_offset or will not
1433                                 * change the execobject.offset, on the
1434                                 * call we may not rewrite the value
1435                                 * inside the object, leaving it
1436                                 * dangling and causing a GPU hang. Unless
1437                                 * userspace dynamically rebuilds the
1438                                 * relocations on each execbuf rather than
1439                                 * presume a static tree.
1440                                 *
1441                                 * We did previously check if the relocations
1442                                 * were writable (access_ok), an error now
1443                                 * would be a strange race with mprotect,
1444                                 * having already demonstrated that we
1445                                 * can read from this userspace address.
1446                                 */
1447                                offset = gen8_canonical_addr(offset & ~UPDATE);
1448                                __put_user(offset,
1449                                           &urelocs[r-stack].presumed_offset);
1450                        }
1451                } while (r++, --count);
1452                urelocs += ARRAY_SIZE(stack);
1453        } while (remain);
1454out:
1455        reloc_cache_reset(&eb->reloc_cache);
1456        return remain;
1457}
1458
1459static int
1460eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
1461{
1462        const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
1463        struct drm_i915_gem_relocation_entry *relocs =
1464                u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1465        unsigned int i;
1466        int err;
1467
1468        for (i = 0; i < entry->relocation_count; i++) {
1469                u64 offset = eb_relocate_entry(eb, vma, &relocs[i]);
1470
1471                if ((s64)offset < 0) {
1472                        err = (int)offset;
1473                        goto err;
1474                }
1475        }
1476        err = 0;
1477err:
1478        reloc_cache_reset(&eb->reloc_cache);
1479        return err;
1480}
1481
1482static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
1483{
1484        const char __user *addr, *end;
1485        unsigned long size;
1486        char __maybe_unused c;
1487
1488        size = entry->relocation_count;
1489        if (size == 0)
1490                return 0;
1491
1492        if (size > N_RELOC(ULONG_MAX))
1493                return -EINVAL;
1494
1495        addr = u64_to_user_ptr(entry->relocs_ptr);
1496        size *= sizeof(struct drm_i915_gem_relocation_entry);
1497        if (!access_ok(VERIFY_READ, addr, size))
1498                return -EFAULT;
1499
1500        end = addr + size;
1501        for (; addr < end; addr += PAGE_SIZE) {
1502                int err = __get_user(c, addr);
1503                if (err)
1504                        return err;
1505        }
1506        return __get_user(c, end - 1);
1507}
1508
1509static int eb_copy_relocations(const struct i915_execbuffer *eb)
1510{
1511        const unsigned int count = eb->buffer_count;
1512        unsigned int i;
1513        int err;
1514
1515        for (i = 0; i < count; i++) {
1516                const unsigned int nreloc = eb->exec[i].relocation_count;
1517                struct drm_i915_gem_relocation_entry __user *urelocs;
1518                struct drm_i915_gem_relocation_entry *relocs;
1519                unsigned long size;
1520                unsigned long copied;
1521
1522                if (nreloc == 0)
1523                        continue;
1524
1525                err = check_relocations(&eb->exec[i]);
1526                if (err)
1527                        goto err;
1528
1529                urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
1530                size = nreloc * sizeof(*relocs);
1531
1532                relocs = kvmalloc_array(size, 1, GFP_KERNEL);
1533                if (!relocs) {
1534                        kvfree(relocs);
1535                        err = -ENOMEM;
1536                        goto err;
1537                }
1538
1539                /* copy_from_user is limited to < 4GiB */
1540                copied = 0;
1541                do {
1542                        unsigned int len =
1543                                min_t(u64, BIT_ULL(31), size - copied);
1544
1545                        if (__copy_from_user((char *)relocs + copied,
1546                                             (char __user *)urelocs + copied,
1547                                             len)) {
1548                                kvfree(relocs);
1549                                err = -EFAULT;
1550                                goto err;
1551                        }
1552
1553                        copied += len;
1554                } while (copied < size);
1555
1556                /*
1557                 * As we do not update the known relocation offsets after
1558                 * relocating (due to the complexities in lock handling),
1559                 * we need to mark them as invalid now so that we force the
1560                 * relocation processing next time. Just in case the target
1561                 * object is evicted and then rebound into its old
1562                 * presumed_offset before the next execbuffer - if that
1563                 * happened we would make the mistake of assuming that the
1564                 * relocations were valid.
1565                 */
1566                user_access_begin();
1567                for (copied = 0; copied < nreloc; copied++)
1568                        unsafe_put_user(-1,
1569                                        &urelocs[copied].presumed_offset,
1570                                        end_user);
1571end_user:
1572                user_access_end();
1573
1574                eb->exec[i].relocs_ptr = (uintptr_t)relocs;
1575        }
1576
1577        return 0;
1578
1579err:
1580        while (i--) {
1581                struct drm_i915_gem_relocation_entry *relocs =
1582                        u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
1583                if (eb->exec[i].relocation_count)
1584                        kvfree(relocs);
1585        }
1586        return err;
1587}
1588
1589static int eb_prefault_relocations(const struct i915_execbuffer *eb)
1590{
1591        const unsigned int count = eb->buffer_count;
1592        unsigned int i;
1593
1594        if (unlikely(i915_modparams.prefault_disable))
1595                return 0;
1596
1597        for (i = 0; i < count; i++) {
1598                int err;
1599
1600                err = check_relocations(&eb->exec[i]);
1601                if (err)
1602                        return err;
1603        }
1604
1605        return 0;
1606}
1607
1608static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
1609{
1610        struct drm_device *dev = &eb->i915->drm;
1611        bool have_copy = false;
1612        struct i915_vma *vma;
1613        int err = 0;
1614
1615repeat:
1616        if (signal_pending(current)) {
1617                err = -ERESTARTSYS;
1618                goto out;
1619        }
1620
1621        /* We may process another execbuffer during the unlock... */
1622        eb_reset_vmas(eb);
1623        mutex_unlock(&dev->struct_mutex);
1624
1625        /*
1626         * We take 3 passes through the slowpatch.
1627         *
1628         * 1 - we try to just prefault all the user relocation entries and
1629         * then attempt to reuse the atomic pagefault disabled fast path again.
1630         *
1631         * 2 - we copy the user entries to a local buffer here outside of the
1632         * local and allow ourselves to wait upon any rendering before
1633         * relocations
1634         *
1635         * 3 - we already have a local copy of the relocation entries, but
1636         * were interrupted (EAGAIN) whilst waiting for the objects, try again.
1637         */
1638        if (!err) {
1639                err = eb_prefault_relocations(eb);
1640        } else if (!have_copy) {
1641                err = eb_copy_relocations(eb);
1642                have_copy = err == 0;
1643        } else {
1644                cond_resched();
1645                err = 0;
1646        }
1647        if (err) {
1648                mutex_lock(&dev->struct_mutex);
1649                goto out;
1650        }
1651
1652        /* A frequent cause for EAGAIN are currently unavailable client pages */
1653        flush_workqueue(eb->i915->mm.userptr_wq);
1654
1655        err = i915_mutex_lock_interruptible(dev);
1656        if (err) {
1657                mutex_lock(&dev->struct_mutex);
1658                goto out;
1659        }
1660
1661        /* reacquire the objects */
1662        err = eb_lookup_vmas(eb);
1663        if (err)
1664                goto err;
1665
1666        GEM_BUG_ON(!eb->batch);
1667
1668        list_for_each_entry(vma, &eb->relocs, reloc_link) {
1669                if (!have_copy) {
1670                        pagefault_disable();
1671                        err = eb_relocate_vma(eb, vma);
1672                        pagefault_enable();
1673                        if (err)
1674                                goto repeat;
1675                } else {
1676                        err = eb_relocate_vma_slow(eb, vma);
1677                        if (err)
1678                                goto err;
1679                }
1680        }
1681
1682        /*
1683         * Leave the user relocations as are, this is the painfully slow path,
1684         * and we want to avoid the complication of dropping the lock whilst
1685         * having buffers reserved in the aperture and so causing spurious
1686         * ENOSPC for random operations.
1687         */
1688
1689err:
1690        if (err == -EAGAIN)
1691                goto repeat;
1692
1693out:
1694        if (have_copy) {
1695                const unsigned int count = eb->buffer_count;
1696                unsigned int i;
1697
1698                for (i = 0; i < count; i++) {
1699                        const struct drm_i915_gem_exec_object2 *entry =
1700                                &eb->exec[i];
1701                        struct drm_i915_gem_relocation_entry *relocs;
1702
1703                        if (!entry->relocation_count)
1704                                continue;
1705
1706                        relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1707                        kvfree(relocs);
1708                }
1709        }
1710
1711        return err;
1712}
1713
1714static int eb_relocate(struct i915_execbuffer *eb)
1715{
1716        if (eb_lookup_vmas(eb))
1717                goto slow;
1718
1719        /* The objects are in their final locations, apply the relocations. */
1720        if (eb->args->flags & __EXEC_HAS_RELOC) {
1721                struct i915_vma *vma;
1722
1723                list_for_each_entry(vma, &eb->relocs, reloc_link) {
1724                        if (eb_relocate_vma(eb, vma))
1725                                goto slow;
1726                }
1727        }
1728
1729        return 0;
1730
1731slow:
1732        return eb_relocate_slow(eb);
1733}
1734
1735static void eb_export_fence(struct i915_vma *vma,
1736                            struct drm_i915_gem_request *req,
1737                            unsigned int flags)
1738{
1739        struct reservation_object *resv = vma->resv;
1740
1741        /*
1742         * Ignore errors from failing to allocate the new fence, we can't
1743         * handle an error right now. Worst case should be missed
1744         * synchronisation leading to rendering corruption.
1745         */
1746        reservation_object_lock(resv, NULL);
1747        if (flags & EXEC_OBJECT_WRITE)
1748                reservation_object_add_excl_fence(resv, &req->fence);
1749        else if (reservation_object_reserve_shared(resv) == 0)
1750                reservation_object_add_shared_fence(resv, &req->fence);
1751        reservation_object_unlock(resv);
1752}
1753
1754static int eb_move_to_gpu(struct i915_execbuffer *eb)
1755{
1756        const unsigned int count = eb->buffer_count;
1757        unsigned int i;
1758        int err;
1759
1760        for (i = 0; i < count; i++) {
1761                unsigned int flags = eb->flags[i];
1762                struct i915_vma *vma = eb->vma[i];
1763                struct drm_i915_gem_object *obj = vma->obj;
1764
1765                if (flags & EXEC_OBJECT_CAPTURE) {
1766                        struct i915_gem_capture_list *capture;
1767
1768                        capture = kmalloc(sizeof(*capture), GFP_KERNEL);
1769                        if (unlikely(!capture))
1770                                return -ENOMEM;
1771
1772                        capture->next = eb->request->capture_list;
1773                        capture->vma = eb->vma[i];
1774                        eb->request->capture_list = capture;
1775                }
1776
1777                /*
1778                 * If the GPU is not _reading_ through the CPU cache, we need
1779                 * to make sure that any writes (both previous GPU writes from
1780                 * before a change in snooping levels and normal CPU writes)
1781                 * caught in that cache are flushed to main memory.
1782                 *
1783                 * We want to say
1784                 *   obj->cache_dirty &&
1785                 *   !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
1786                 * but gcc's optimiser doesn't handle that as well and emits
1787                 * two jumps instead of one. Maybe one day...
1788                 */
1789                if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
1790                        if (i915_gem_clflush_object(obj, 0))
1791                                flags &= ~EXEC_OBJECT_ASYNC;
1792                }
1793
1794                if (flags & EXEC_OBJECT_ASYNC)
1795                        continue;
1796
1797                err = i915_gem_request_await_object
1798                        (eb->request, obj, flags & EXEC_OBJECT_WRITE);
1799                if (err)
1800                        return err;
1801        }
1802
1803        for (i = 0; i < count; i++) {
1804                unsigned int flags = eb->flags[i];
1805                struct i915_vma *vma = eb->vma[i];
1806
1807                i915_vma_move_to_active(vma, eb->request, flags);
1808                eb_export_fence(vma, eb->request, flags);
1809
1810                __eb_unreserve_vma(vma, flags);
1811                vma->exec_flags = NULL;
1812
1813                if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
1814                        i915_vma_put(vma);
1815        }
1816        eb->exec = NULL;
1817
1818        /* Unconditionally flush any chipset caches (for streaming writes). */
1819        i915_gem_chipset_flush(eb->i915);
1820
1821        /* Unconditionally invalidate GPU caches and TLBs. */
1822        return eb->engine->emit_flush(eb->request, EMIT_INVALIDATE);
1823}
1824
1825static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
1826{
1827        if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
1828                return false;
1829
1830        /* Kernel clipping was a DRI1 misfeature */
1831        if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
1832                if (exec->num_cliprects || exec->cliprects_ptr)
1833                        return false;
1834        }
1835
1836        if (exec->DR4 == 0xffffffff) {
1837                DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1838                exec->DR4 = 0;
1839        }
1840        if (exec->DR1 || exec->DR4)
1841                return false;
1842
1843        if ((exec->batch_start_offset | exec->batch_len) & 0x7)
1844                return false;
1845
1846        return true;
1847}
1848
1849void i915_vma_move_to_active(struct i915_vma *vma,
1850                             struct drm_i915_gem_request *req,
1851                             unsigned int flags)
1852{
1853        struct drm_i915_gem_object *obj = vma->obj;
1854        const unsigned int idx = req->engine->id;
1855
1856        lockdep_assert_held(&req->i915->drm.struct_mutex);
1857        GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
1858
1859        /*
1860         * Add a reference if we're newly entering the active list.
1861         * The order in which we add operations to the retirement queue is
1862         * vital here: mark_active adds to the start of the callback list,
1863         * such that subsequent callbacks are called first. Therefore we
1864         * add the active reference first and queue for it to be dropped
1865         * *last*.
1866         */
1867        if (!i915_vma_is_active(vma))
1868                obj->active_count++;
1869        i915_vma_set_active(vma, idx);
1870        i915_gem_active_set(&vma->last_read[idx], req);
1871        list_move_tail(&vma->vm_link, &vma->vm->active_list);
1872
1873        obj->base.write_domain = 0;
1874        if (flags & EXEC_OBJECT_WRITE) {
1875                obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
1876
1877                if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
1878                        i915_gem_active_set(&obj->frontbuffer_write, req);
1879
1880                obj->base.read_domains = 0;
1881        }
1882        obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
1883
1884        if (flags & EXEC_OBJECT_NEEDS_FENCE)
1885                i915_gem_active_set(&vma->last_fence, req);
1886}
1887
1888static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
1889{
1890        u32 *cs;
1891        int i;
1892
1893        if (!IS_GEN7(req->i915) || req->engine->id != RCS) {
1894                DRM_DEBUG("sol reset is gen7/rcs only\n");
1895                return -EINVAL;
1896        }
1897
1898        cs = intel_ring_begin(req, 4 * 2 + 2);
1899        if (IS_ERR(cs))
1900                return PTR_ERR(cs);
1901
1902        *cs++ = MI_LOAD_REGISTER_IMM(4);
1903        for (i = 0; i < 4; i++) {
1904                *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
1905                *cs++ = 0;
1906        }
1907        *cs++ = MI_NOOP;
1908        intel_ring_advance(req, cs);
1909
1910        return 0;
1911}
1912
1913static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
1914{
1915        struct drm_i915_gem_object *shadow_batch_obj;
1916        struct i915_vma *vma;
1917        int err;
1918
1919        shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool,
1920                                                   PAGE_ALIGN(eb->batch_len));
1921        if (IS_ERR(shadow_batch_obj))
1922                return ERR_CAST(shadow_batch_obj);
1923
1924        err = intel_engine_cmd_parser(eb->engine,
1925                                      eb->batch->obj,
1926                                      shadow_batch_obj,
1927                                      eb->batch_start_offset,
1928                                      eb->batch_len,
1929                                      is_master);
1930        if (err) {
1931                if (err == -EACCES) /* unhandled chained batch */
1932                        vma = NULL;
1933                else
1934                        vma = ERR_PTR(err);
1935                goto out;
1936        }
1937
1938        vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
1939        if (IS_ERR(vma))
1940                goto out;
1941
1942        eb->vma[eb->buffer_count] = i915_vma_get(vma);
1943        eb->flags[eb->buffer_count] =
1944                __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
1945        vma->exec_flags = &eb->flags[eb->buffer_count];
1946        eb->buffer_count++;
1947
1948out:
1949        i915_gem_object_unpin_pages(shadow_batch_obj);
1950        return vma;
1951}
1952
1953static void
1954add_to_client(struct drm_i915_gem_request *req, struct drm_file *file)
1955{
1956        req->file_priv = file->driver_priv;
1957        list_add_tail(&req->client_link, &req->file_priv->mm.request_list);
1958}
1959
1960static int eb_submit(struct i915_execbuffer *eb)
1961{
1962        int err;
1963
1964        err = eb_move_to_gpu(eb);
1965        if (err)
1966                return err;
1967
1968        err = i915_switch_context(eb->request);
1969        if (err)
1970                return err;
1971
1972        if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
1973                err = i915_reset_gen7_sol_offsets(eb->request);
1974                if (err)
1975                        return err;
1976        }
1977
1978        err = eb->engine->emit_bb_start(eb->request,
1979                                        eb->batch->node.start +
1980                                        eb->batch_start_offset,
1981                                        eb->batch_len,
1982                                        eb->batch_flags);
1983        if (err)
1984                return err;
1985
1986        return 0;
1987}
1988
1989/**
1990 * Find one BSD ring to dispatch the corresponding BSD command.
1991 * The engine index is returned.
1992 */
1993static unsigned int
1994gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
1995                         struct drm_file *file)
1996{
1997        struct drm_i915_file_private *file_priv = file->driver_priv;
1998
1999        /* Check whether the file_priv has already selected one ring. */
2000        if ((int)file_priv->bsd_engine < 0)
2001                file_priv->bsd_engine = atomic_fetch_xor(1,
2002                         &dev_priv->mm.bsd_engine_dispatch_index);
2003
2004        return file_priv->bsd_engine;
2005}
2006
2007#define I915_USER_RINGS (4)
2008
2009static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
2010        [I915_EXEC_DEFAULT]     = RCS,
2011        [I915_EXEC_RENDER]      = RCS,
2012        [I915_EXEC_BLT]         = BCS,
2013        [I915_EXEC_BSD]         = VCS,
2014        [I915_EXEC_VEBOX]       = VECS
2015};
2016
2017static struct intel_engine_cs *
2018eb_select_engine(struct drm_i915_private *dev_priv,
2019                 struct drm_file *file,
2020                 struct drm_i915_gem_execbuffer2 *args)
2021{
2022        unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
2023        struct intel_engine_cs *engine;
2024
2025        if (user_ring_id > I915_USER_RINGS) {
2026                DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
2027                return NULL;
2028        }
2029
2030        if ((user_ring_id != I915_EXEC_BSD) &&
2031            ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
2032                DRM_DEBUG("execbuf with non bsd ring but with invalid "
2033                          "bsd dispatch flags: %d\n", (int)(args->flags));
2034                return NULL;
2035        }
2036
2037        if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
2038                unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
2039
2040                if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
2041                        bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
2042                } else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
2043                           bsd_idx <= I915_EXEC_BSD_RING2) {
2044                        bsd_idx >>= I915_EXEC_BSD_SHIFT;
2045                        bsd_idx--;
2046                } else {
2047                        DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
2048                                  bsd_idx);
2049                        return NULL;
2050                }
2051
2052                engine = dev_priv->engine[_VCS(bsd_idx)];
2053        } else {
2054                engine = dev_priv->engine[user_ring_map[user_ring_id]];
2055        }
2056
2057        if (!engine) {
2058                DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
2059                return NULL;
2060        }
2061
2062        return engine;
2063}
2064
2065static void
2066__free_fence_array(struct drm_syncobj **fences, unsigned int n)
2067{
2068        while (n--)
2069                drm_syncobj_put(ptr_mask_bits(fences[n], 2));
2070        kvfree(fences);
2071}
2072
2073static struct drm_syncobj **
2074get_fence_array(struct drm_i915_gem_execbuffer2 *args,
2075                struct drm_file *file)
2076{
2077        const unsigned int nfences = args->num_cliprects;
2078        struct drm_i915_gem_exec_fence __user *user;
2079        struct drm_syncobj **fences;
2080        unsigned int n;
2081        int err;
2082
2083        if (!(args->flags & I915_EXEC_FENCE_ARRAY))
2084                return NULL;
2085
2086        if (nfences > SIZE_MAX / sizeof(*fences))
2087                return ERR_PTR(-EINVAL);
2088
2089        user = u64_to_user_ptr(args->cliprects_ptr);
2090        if (!access_ok(VERIFY_READ, user, nfences * 2 * sizeof(u32)))
2091                return ERR_PTR(-EFAULT);
2092
2093        fences = kvmalloc_array(args->num_cliprects, sizeof(*fences),
2094                                __GFP_NOWARN | GFP_KERNEL);
2095        if (!fences)
2096                return ERR_PTR(-ENOMEM);
2097
2098        for (n = 0; n < nfences; n++) {
2099                struct drm_i915_gem_exec_fence fence;
2100                struct drm_syncobj *syncobj;
2101
2102                if (__copy_from_user(&fence, user++, sizeof(fence))) {
2103                        err = -EFAULT;
2104                        goto err;
2105                }
2106
2107                if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
2108                        err = -EINVAL;
2109                        goto err;
2110                }
2111
2112                syncobj = drm_syncobj_find(file, fence.handle);
2113                if (!syncobj) {
2114                        DRM_DEBUG("Invalid syncobj handle provided\n");
2115                        err = -ENOENT;
2116                        goto err;
2117                }
2118
2119                BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
2120                             ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
2121
2122                fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
2123        }
2124
2125        return fences;
2126
2127err:
2128        __free_fence_array(fences, n);
2129        return ERR_PTR(err);
2130}
2131
2132static void
2133put_fence_array(struct drm_i915_gem_execbuffer2 *args,
2134                struct drm_syncobj **fences)
2135{
2136        if (fences)
2137                __free_fence_array(fences, args->num_cliprects);
2138}
2139
2140static int
2141await_fence_array(struct i915_execbuffer *eb,
2142                  struct drm_syncobj **fences)
2143{
2144        const unsigned int nfences = eb->args->num_cliprects;
2145        unsigned int n;
2146        int err;
2147
2148        for (n = 0; n < nfences; n++) {
2149                struct drm_syncobj *syncobj;
2150                struct dma_fence *fence;
2151                unsigned int flags;
2152
2153                syncobj = ptr_unpack_bits(fences[n], &flags, 2);
2154                if (!(flags & I915_EXEC_FENCE_WAIT))
2155                        continue;
2156
2157                fence = drm_syncobj_fence_get(syncobj);
2158                if (!fence)
2159                        return -EINVAL;
2160
2161                err = i915_gem_request_await_dma_fence(eb->request, fence);
2162                dma_fence_put(fence);
2163                if (err < 0)
2164                        return err;
2165        }
2166
2167        return 0;
2168}
2169
2170static void
2171signal_fence_array(struct i915_execbuffer *eb,
2172                   struct drm_syncobj **fences)
2173{
2174        const unsigned int nfences = eb->args->num_cliprects;
2175        struct dma_fence * const fence = &eb->request->fence;
2176        unsigned int n;
2177
2178        for (n = 0; n < nfences; n++) {
2179                struct drm_syncobj *syncobj;
2180                unsigned int flags;
2181
2182                syncobj = ptr_unpack_bits(fences[n], &flags, 2);
2183                if (!(flags & I915_EXEC_FENCE_SIGNAL))
2184                        continue;
2185
2186                drm_syncobj_replace_fence(syncobj, fence);
2187        }
2188}
2189
2190static int
2191i915_gem_do_execbuffer(struct drm_device *dev,
2192                       struct drm_file *file,
2193                       struct drm_i915_gem_execbuffer2 *args,
2194                       struct drm_i915_gem_exec_object2 *exec,
2195                       struct drm_syncobj **fences)
2196{
2197        struct i915_execbuffer eb;
2198        struct dma_fence *in_fence = NULL;
2199        struct sync_file *out_fence = NULL;
2200        int out_fence_fd = -1;
2201        int err;
2202
2203        BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
2204        BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
2205                     ~__EXEC_OBJECT_UNKNOWN_FLAGS);
2206
2207        eb.i915 = to_i915(dev);
2208        eb.file = file;
2209        eb.args = args;
2210        if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
2211                args->flags |= __EXEC_HAS_RELOC;
2212
2213        eb.exec = exec;
2214        eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1);
2215        eb.vma[0] = NULL;
2216        eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
2217
2218        eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
2219        if (USES_FULL_PPGTT(eb.i915))
2220                eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
2221        reloc_cache_init(&eb.reloc_cache, eb.i915);
2222
2223        eb.buffer_count = args->buffer_count;
2224        eb.batch_start_offset = args->batch_start_offset;
2225        eb.batch_len = args->batch_len;
2226
2227        eb.batch_flags = 0;
2228        if (args->flags & I915_EXEC_SECURE) {
2229                if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
2230                    return -EPERM;
2231
2232                eb.batch_flags |= I915_DISPATCH_SECURE;
2233        }
2234        if (args->flags & I915_EXEC_IS_PINNED)
2235                eb.batch_flags |= I915_DISPATCH_PINNED;
2236
2237        eb.engine = eb_select_engine(eb.i915, file, args);
2238        if (!eb.engine)
2239                return -EINVAL;
2240
2241        if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
2242                if (!HAS_RESOURCE_STREAMER(eb.i915)) {
2243                        DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
2244                        return -EINVAL;
2245                }
2246                if (eb.engine->id != RCS) {
2247                        DRM_DEBUG("RS is not available on %s\n",
2248                                 eb.engine->name);
2249                        return -EINVAL;
2250                }
2251
2252                eb.batch_flags |= I915_DISPATCH_RS;
2253        }
2254
2255        if (args->flags & I915_EXEC_FENCE_IN) {
2256                in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
2257                if (!in_fence)
2258                        return -EINVAL;
2259        }
2260
2261        if (args->flags & I915_EXEC_FENCE_OUT) {
2262                out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
2263                if (out_fence_fd < 0) {
2264                        err = out_fence_fd;
2265                        goto err_in_fence;
2266                }
2267        }
2268
2269        err = eb_create(&eb);
2270        if (err)
2271                goto err_out_fence;
2272
2273        GEM_BUG_ON(!eb.lut_size);
2274
2275        err = eb_select_context(&eb);
2276        if (unlikely(err))
2277                goto err_destroy;
2278
2279        /*
2280         * Take a local wakeref for preparing to dispatch the execbuf as
2281         * we expect to access the hardware fairly frequently in the
2282         * process. Upon first dispatch, we acquire another prolonged
2283         * wakeref that we hold until the GPU has been idle for at least
2284         * 100ms.
2285         */
2286        intel_runtime_pm_get(eb.i915);
2287
2288        err = i915_mutex_lock_interruptible(dev);
2289        if (err)
2290                goto err_rpm;
2291
2292        err = eb_relocate(&eb);
2293        if (err) {
2294                /*
2295                 * If the user expects the execobject.offset and
2296                 * reloc.presumed_offset to be an exact match,
2297                 * as for using NO_RELOC, then we cannot update
2298                 * the execobject.offset until we have completed
2299                 * relocation.
2300                 */
2301                args->flags &= ~__EXEC_HAS_RELOC;
2302                goto err_vma;
2303        }
2304
2305        if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) {
2306                DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
2307                err = -EINVAL;
2308                goto err_vma;
2309        }
2310        if (eb.batch_start_offset > eb.batch->size ||
2311            eb.batch_len > eb.batch->size - eb.batch_start_offset) {
2312                DRM_DEBUG("Attempting to use out-of-bounds batch\n");
2313                err = -EINVAL;
2314                goto err_vma;
2315        }
2316
2317        if (eb_use_cmdparser(&eb)) {
2318                struct i915_vma *vma;
2319
2320                vma = eb_parse(&eb, drm_is_current_master(file));
2321                if (IS_ERR(vma)) {
2322                        err = PTR_ERR(vma);
2323                        goto err_vma;
2324                }
2325
2326                if (vma) {
2327                        /*
2328                         * Batch parsed and accepted:
2329                         *
2330                         * Set the DISPATCH_SECURE bit to remove the NON_SECURE
2331                         * bit from MI_BATCH_BUFFER_START commands issued in
2332                         * the dispatch_execbuffer implementations. We
2333                         * specifically don't want that set on batches the
2334                         * command parser has accepted.
2335                         */
2336                        eb.batch_flags |= I915_DISPATCH_SECURE;
2337                        eb.batch_start_offset = 0;
2338                        eb.batch = vma;
2339                }
2340        }
2341
2342        if (eb.batch_len == 0)
2343                eb.batch_len = eb.batch->size - eb.batch_start_offset;
2344
2345        /*
2346         * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
2347         * batch" bit. Hence we need to pin secure batches into the global gtt.
2348         * hsw should have this fixed, but bdw mucks it up again. */
2349        if (eb.batch_flags & I915_DISPATCH_SECURE) {
2350                struct i915_vma *vma;
2351
2352                /*
2353                 * So on first glance it looks freaky that we pin the batch here
2354                 * outside of the reservation loop. But:
2355                 * - The batch is already pinned into the relevant ppgtt, so we
2356                 *   already have the backing storage fully allocated.
2357                 * - No other BO uses the global gtt (well contexts, but meh),
2358                 *   so we don't really have issues with multiple objects not
2359                 *   fitting due to fragmentation.
2360                 * So this is actually safe.
2361                 */
2362                vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0);
2363                if (IS_ERR(vma)) {
2364                        err = PTR_ERR(vma);
2365                        goto err_vma;
2366                }
2367
2368                eb.batch = vma;
2369        }
2370
2371        /* All GPU relocation batches must be submitted prior to the user rq */
2372        GEM_BUG_ON(eb.reloc_cache.rq);
2373
2374        /* Allocate a request for this batch buffer nice and early. */
2375        eb.request = i915_gem_request_alloc(eb.engine, eb.ctx);
2376        if (IS_ERR(eb.request)) {
2377                err = PTR_ERR(eb.request);
2378                goto err_batch_unpin;
2379        }
2380
2381        if (in_fence) {
2382                err = i915_gem_request_await_dma_fence(eb.request, in_fence);
2383                if (err < 0)
2384                        goto err_request;
2385        }
2386
2387        if (fences) {
2388                err = await_fence_array(&eb, fences);
2389                if (err)
2390                        goto err_request;
2391        }
2392
2393        if (out_fence_fd != -1) {
2394                out_fence = sync_file_create(&eb.request->fence);
2395                if (!out_fence) {
2396                        err = -ENOMEM;
2397                        goto err_request;
2398                }
2399        }
2400
2401        /*
2402         * Whilst this request exists, batch_obj will be on the
2403         * active_list, and so will hold the active reference. Only when this
2404         * request is retired will the the batch_obj be moved onto the
2405         * inactive_list and lose its active reference. Hence we do not need
2406         * to explicitly hold another reference here.
2407         */
2408        eb.request->batch = eb.batch;
2409
2410        trace_i915_gem_request_queue(eb.request, eb.batch_flags);
2411        err = eb_submit(&eb);
2412err_request:
2413        __i915_add_request(eb.request, err == 0);
2414        add_to_client(eb.request, file);
2415
2416        if (fences)
2417                signal_fence_array(&eb, fences);
2418
2419        if (out_fence) {
2420                if (err == 0) {
2421                        fd_install(out_fence_fd, out_fence->file);
2422                        args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */
2423                        args->rsvd2 |= (u64)out_fence_fd << 32;
2424                        out_fence_fd = -1;
2425                } else {
2426                        fput(out_fence->file);
2427                }
2428        }
2429
2430err_batch_unpin:
2431        if (eb.batch_flags & I915_DISPATCH_SECURE)
2432                i915_vma_unpin(eb.batch);
2433err_vma:
2434        if (eb.exec)
2435                eb_release_vmas(&eb);
2436        mutex_unlock(&dev->struct_mutex);
2437err_rpm:
2438        intel_runtime_pm_put(eb.i915);
2439        i915_gem_context_put(eb.ctx);
2440err_destroy:
2441        eb_destroy(&eb);
2442err_out_fence:
2443        if (out_fence_fd != -1)
2444                put_unused_fd(out_fence_fd);
2445err_in_fence:
2446        dma_fence_put(in_fence);
2447        return err;
2448}
2449
2450/*
2451 * Legacy execbuffer just creates an exec2 list from the original exec object
2452 * list array and passes it to the real function.
2453 */
2454int
2455i915_gem_execbuffer(struct drm_device *dev, void *data,
2456                    struct drm_file *file)
2457{
2458        const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) +
2459                           sizeof(struct i915_vma *) +
2460                           sizeof(unsigned int));
2461        struct drm_i915_gem_execbuffer *args = data;
2462        struct drm_i915_gem_execbuffer2 exec2;
2463        struct drm_i915_gem_exec_object *exec_list = NULL;
2464        struct drm_i915_gem_exec_object2 *exec2_list = NULL;
2465        unsigned int i;
2466        int err;
2467
2468        if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) {
2469                DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
2470                return -EINVAL;
2471        }
2472
2473        exec2.buffers_ptr = args->buffers_ptr;
2474        exec2.buffer_count = args->buffer_count;
2475        exec2.batch_start_offset = args->batch_start_offset;
2476        exec2.batch_len = args->batch_len;
2477        exec2.DR1 = args->DR1;
2478        exec2.DR4 = args->DR4;
2479        exec2.num_cliprects = args->num_cliprects;
2480        exec2.cliprects_ptr = args->cliprects_ptr;
2481        exec2.flags = I915_EXEC_RENDER;
2482        i915_execbuffer2_set_context_id(exec2, 0);
2483
2484        if (!i915_gem_check_execbuffer(&exec2))
2485                return -EINVAL;
2486
2487        /* Copy in the exec list from userland */
2488        exec_list = kvmalloc_array(args->buffer_count, sizeof(*exec_list),
2489                                   __GFP_NOWARN | GFP_KERNEL);
2490        exec2_list = kvmalloc_array(args->buffer_count + 1, sz,
2491                                    __GFP_NOWARN | GFP_KERNEL);
2492        if (exec_list == NULL || exec2_list == NULL) {
2493                DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
2494                          args->buffer_count);
2495                kvfree(exec_list);
2496                kvfree(exec2_list);
2497                return -ENOMEM;
2498        }
2499        err = copy_from_user(exec_list,
2500                             u64_to_user_ptr(args->buffers_ptr),
2501                             sizeof(*exec_list) * args->buffer_count);
2502        if (err) {
2503                DRM_DEBUG("copy %d exec entries failed %d\n",
2504                          args->buffer_count, err);
2505                kvfree(exec_list);
2506                kvfree(exec2_list);
2507                return -EFAULT;
2508        }
2509
2510        for (i = 0; i < args->buffer_count; i++) {
2511                exec2_list[i].handle = exec_list[i].handle;
2512                exec2_list[i].relocation_count = exec_list[i].relocation_count;
2513                exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
2514                exec2_list[i].alignment = exec_list[i].alignment;
2515                exec2_list[i].offset = exec_list[i].offset;
2516                if (INTEL_GEN(to_i915(dev)) < 4)
2517                        exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
2518                else
2519                        exec2_list[i].flags = 0;
2520        }
2521
2522        err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
2523        if (exec2.flags & __EXEC_HAS_RELOC) {
2524                struct drm_i915_gem_exec_object __user *user_exec_list =
2525                        u64_to_user_ptr(args->buffers_ptr);
2526
2527                /* Copy the new buffer offsets back to the user's exec list. */
2528                for (i = 0; i < args->buffer_count; i++) {
2529                        if (!(exec2_list[i].offset & UPDATE))
2530                                continue;
2531
2532                        exec2_list[i].offset =
2533                                gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
2534                        exec2_list[i].offset &= PIN_OFFSET_MASK;
2535                        if (__copy_to_user(&user_exec_list[i].offset,
2536                                           &exec2_list[i].offset,
2537                                           sizeof(user_exec_list[i].offset)))
2538                                break;
2539                }
2540        }
2541
2542        kvfree(exec_list);
2543        kvfree(exec2_list);
2544        return err;
2545}
2546
2547int
2548i915_gem_execbuffer2(struct drm_device *dev, void *data,
2549                     struct drm_file *file)
2550{
2551        const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) +
2552                           sizeof(struct i915_vma *) +
2553                           sizeof(unsigned int));
2554        struct drm_i915_gem_execbuffer2 *args = data;
2555        struct drm_i915_gem_exec_object2 *exec2_list;
2556        struct drm_syncobj **fences = NULL;
2557        int err;
2558
2559        if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) {
2560                DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
2561                return -EINVAL;
2562        }
2563
2564        if (!i915_gem_check_execbuffer(args))
2565                return -EINVAL;
2566
2567        /* Allocate an extra slot for use by the command parser */
2568        exec2_list = kvmalloc_array(args->buffer_count + 1, sz,
2569                                    __GFP_NOWARN | GFP_KERNEL);
2570        if (exec2_list == NULL) {
2571                DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
2572                          args->buffer_count);
2573                return -ENOMEM;
2574        }
2575        if (copy_from_user(exec2_list,
2576                           u64_to_user_ptr(args->buffers_ptr),
2577                           sizeof(*exec2_list) * args->buffer_count)) {
2578                DRM_DEBUG("copy %d exec entries failed\n", args->buffer_count);
2579                kvfree(exec2_list);
2580                return -EFAULT;
2581        }
2582
2583        if (args->flags & I915_EXEC_FENCE_ARRAY) {
2584                fences = get_fence_array(args, file);
2585                if (IS_ERR(fences)) {
2586                        kvfree(exec2_list);
2587                        return PTR_ERR(fences);
2588                }
2589        }
2590
2591        err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
2592
2593        /*
2594         * Now that we have begun execution of the batchbuffer, we ignore
2595         * any new error after this point. Also given that we have already
2596         * updated the associated relocations, we try to write out the current
2597         * object locations irrespective of any error.
2598         */
2599        if (args->flags & __EXEC_HAS_RELOC) {
2600                struct drm_i915_gem_exec_object2 __user *user_exec_list =
2601                        u64_to_user_ptr(args->buffers_ptr);
2602                unsigned int i;
2603
2604                /* Copy the new buffer offsets back to the user's exec list. */
2605                user_access_begin();
2606                for (i = 0; i < args->buffer_count; i++) {
2607                        if (!(exec2_list[i].offset & UPDATE))
2608                                continue;
2609
2610                        exec2_list[i].offset =
2611                                gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
2612                        unsafe_put_user(exec2_list[i].offset,
2613                                        &user_exec_list[i].offset,
2614                                        end_user);
2615                }
2616end_user:
2617                user_access_end();
2618        }
2619
2620        args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
2621        put_fence_array(args, fences);
2622        kvfree(exec2_list);
2623        return err;
2624}
2625