linux/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2008,2010 Intel Corporation
   5 */
   6
   7#include <linux/intel-iommu.h>
   8#include <linux/reservation.h>
   9#include <linux/sync_file.h>
  10#include <linux/uaccess.h>
  11
  12#include <drm/drm_syncobj.h>
  13#include <drm/i915_drm.h>
  14
  15#include "display/intel_frontbuffer.h"
  16
  17#include "gem/i915_gem_ioctls.h"
  18#include "gt/intel_context.h"
  19#include "gt/intel_gt_pm.h"
  20
  21#include "i915_gem_ioctls.h"
  22#include "i915_gem_clflush.h"
  23#include "i915_gem_context.h"
  24#include "i915_trace.h"
  25#include "intel_drv.h"
  26
  27enum {
  28        FORCE_CPU_RELOC = 1,
  29        FORCE_GTT_RELOC,
  30        FORCE_GPU_RELOC,
  31#define DBG_FORCE_RELOC 0 /* choose one of the above! */
  32};
  33
  34#define __EXEC_OBJECT_HAS_REF           BIT(31)
  35#define __EXEC_OBJECT_HAS_PIN           BIT(30)
  36#define __EXEC_OBJECT_HAS_FENCE         BIT(29)
  37#define __EXEC_OBJECT_NEEDS_MAP         BIT(28)
  38#define __EXEC_OBJECT_NEEDS_BIAS        BIT(27)
  39#define __EXEC_OBJECT_INTERNAL_FLAGS    (~0u << 27) /* all of the above */
  40#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
  41
  42#define __EXEC_HAS_RELOC        BIT(31)
  43#define __EXEC_VALIDATED        BIT(30)
  44#define __EXEC_INTERNAL_FLAGS   (~0u << 30)
  45#define UPDATE                  PIN_OFFSET_FIXED
  46
  47#define BATCH_OFFSET_BIAS (256*1024)
  48
  49#define __I915_EXEC_ILLEGAL_FLAGS \
  50        (__I915_EXEC_UNKNOWN_FLAGS | \
  51         I915_EXEC_CONSTANTS_MASK  | \
  52         I915_EXEC_RESOURCE_STREAMER)
  53
  54/* Catch emission of unexpected errors for CI! */
  55#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
  56#undef EINVAL
  57#define EINVAL ({ \
  58        DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \
  59        22; \
  60})
  61#endif
  62
  63/**
  64 * DOC: User command execution
  65 *
  66 * Userspace submits commands to be executed on the GPU as an instruction
  67 * stream within a GEM object we call a batchbuffer. This instructions may
  68 * refer to other GEM objects containing auxiliary state such as kernels,
  69 * samplers, render targets and even secondary batchbuffers. Userspace does
  70 * not know where in the GPU memory these objects reside and so before the
  71 * batchbuffer is passed to the GPU for execution, those addresses in the
  72 * batchbuffer and auxiliary objects are updated. This is known as relocation,
  73 * or patching. To try and avoid having to relocate each object on the next
  74 * execution, userspace is told the location of those objects in this pass,
  75 * but this remains just a hint as the kernel may choose a new location for
  76 * any object in the future.
  77 *
  78 * At the level of talking to the hardware, submitting a batchbuffer for the
  79 * GPU to execute is to add content to a buffer from which the HW
  80 * command streamer is reading.
  81 *
  82 * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e.
  83 *    Execlists, this command is not placed on the same buffer as the
  84 *    remaining items.
  85 *
  86 * 2. Add a command to invalidate caches to the buffer.
  87 *
  88 * 3. Add a batchbuffer start command to the buffer; the start command is
  89 *    essentially a token together with the GPU address of the batchbuffer
  90 *    to be executed.
  91 *
  92 * 4. Add a pipeline flush to the buffer.
  93 *
  94 * 5. Add a memory write command to the buffer to record when the GPU
  95 *    is done executing the batchbuffer. The memory write writes the
  96 *    global sequence number of the request, ``i915_request::global_seqno``;
  97 *    the i915 driver uses the current value in the register to determine
  98 *    if the GPU has completed the batchbuffer.
  99 *
 100 * 6. Add a user interrupt command to the buffer. This command instructs
 101 *    the GPU to issue an interrupt when the command, pipeline flush and
 102 *    memory write are completed.
 103 *
 104 * 7. Inform the hardware of the additional commands added to the buffer
 105 *    (by updating the tail pointer).
 106 *
 107 * Processing an execbuf ioctl is conceptually split up into a few phases.
 108 *
 109 * 1. Validation - Ensure all the pointers, handles and flags are valid.
 110 * 2. Reservation - Assign GPU address space for every object
 111 * 3. Relocation - Update any addresses to point to the final locations
 112 * 4. Serialisation - Order the request with respect to its dependencies
 113 * 5. Construction - Construct a request to execute the batchbuffer
 114 * 6. Submission (at some point in the future execution)
 115 *
 116 * Reserving resources for the execbuf is the most complicated phase. We
 117 * neither want to have to migrate the object in the address space, nor do
 118 * we want to have to update any relocations pointing to this object. Ideally,
 119 * we want to leave the object where it is and for all the existing relocations
 120 * to match. If the object is given a new address, or if userspace thinks the
 121 * object is elsewhere, we have to parse all the relocation entries and update
 122 * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
 123 * all the target addresses in all of its objects match the value in the
 124 * relocation entries and that they all match the presumed offsets given by the
 125 * list of execbuffer objects. Using this knowledge, we know that if we haven't
 126 * moved any buffers, all the relocation entries are valid and we can skip
 127 * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
 128 * hang.) The requirement for using I915_EXEC_NO_RELOC are:
 129 *
 130 *      The addresses written in the objects must match the corresponding
 131 *      reloc.presumed_offset which in turn must match the corresponding
 132 *      execobject.offset.
 133 *
 134 *      Any render targets written to in the batch must be flagged with
 135 *      EXEC_OBJECT_WRITE.
 136 *
 137 *      To avoid stalling, execobject.offset should match the current
 138 *      address of that object within the active context.
 139 *
 140 * The reservation is done is multiple phases. First we try and keep any
 141 * object already bound in its current location - so as long as meets the
 142 * constraints imposed by the new execbuffer. Any object left unbound after the
 143 * first pass is then fitted into any available idle space. If an object does
 144 * not fit, all objects are removed from the reservation and the process rerun
 145 * after sorting the objects into a priority order (more difficult to fit
 146 * objects are tried first). Failing that, the entire VM is cleared and we try
 147 * to fit the execbuf once last time before concluding that it simply will not
 148 * fit.
 149 *
 150 * A small complication to all of this is that we allow userspace not only to
 151 * specify an alignment and a size for the object in the address space, but
 152 * we also allow userspace to specify the exact offset. This objects are
 153 * simpler to place (the location is known a priori) all we have to do is make
 154 * sure the space is available.
 155 *
 156 * Once all the objects are in place, patching up the buried pointers to point
 157 * to the final locations is a fairly simple job of walking over the relocation
 158 * entry arrays, looking up the right address and rewriting the value into
 159 * the object. Simple! ... The relocation entries are stored in user memory
 160 * and so to access them we have to copy them into a local buffer. That copy
 161 * has to avoid taking any pagefaults as they may lead back to a GEM object
 162 * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
 163 * the relocation into multiple passes. First we try to do everything within an
 164 * atomic context (avoid the pagefaults) which requires that we never wait. If
 165 * we detect that we may wait, or if we need to fault, then we have to fallback
 166 * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
 167 * bells yet?) Dropping the mutex means that we lose all the state we have
 168 * built up so far for the execbuf and we must reset any global data. However,
 169 * we do leave the objects pinned in their final locations - which is a
 170 * potential issue for concurrent execbufs. Once we have left the mutex, we can
 171 * allocate and copy all the relocation entries into a large array at our
 172 * leisure, reacquire the mutex, reclaim all the objects and other state and
 173 * then proceed to update any incorrect addresses with the objects.
 174 *
 175 * As we process the relocation entries, we maintain a record of whether the
 176 * object is being written to. Using NORELOC, we expect userspace to provide
 177 * this information instead. We also check whether we can skip the relocation
 178 * by comparing the expected value inside the relocation entry with the target's
 179 * final address. If they differ, we have to map the current object and rewrite
 180 * the 4 or 8 byte pointer within.
 181 *
 182 * Serialising an execbuf is quite simple according to the rules of the GEM
 183 * ABI. Execution within each context is ordered by the order of submission.
 184 * Writes to any GEM object are in order of submission and are exclusive. Reads
 185 * from a GEM object are unordered with respect to other reads, but ordered by
 186 * writes. A write submitted after a read cannot occur before the read, and
 187 * similarly any read submitted after a write cannot occur before the write.
 188 * Writes are ordered between engines such that only one write occurs at any
 189 * time (completing any reads beforehand) - using semaphores where available
 190 * and CPU serialisation otherwise. Other GEM access obey the same rules, any
 191 * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
 192 * reads before starting, and any read (either using set-domain or pread) must
 193 * flush all GPU writes before starting. (Note we only employ a barrier before,
 194 * we currently rely on userspace not concurrently starting a new execution
 195 * whilst reading or writing to an object. This may be an advantage or not
 196 * depending on how much you trust userspace not to shoot themselves in the
 197 * foot.) Serialisation may just result in the request being inserted into
 198 * a DAG awaiting its turn, but most simple is to wait on the CPU until
 199 * all dependencies are resolved.
 200 *
 201 * After all of that, is just a matter of closing the request and handing it to
 202 * the hardware (well, leaving it in a queue to be executed). However, we also
 203 * offer the ability for batchbuffers to be run with elevated privileges so
 204 * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
 205 * Before any batch is given extra privileges we first must check that it
 206 * contains no nefarious instructions, we check that each instruction is from
 207 * our whitelist and all registers are also from an allowed list. We first
 208 * copy the user's batchbuffer to a shadow (so that the user doesn't have
 209 * access to it, either by the CPU or GPU as we scan it) and then parse each
 210 * instruction. If everything is ok, we set a flag telling the hardware to run
 211 * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
 212 */
 213
 214struct i915_execbuffer {
 215        struct drm_i915_private *i915; /** i915 backpointer */
 216        struct drm_file *file; /** per-file lookup tables and limits */
 217        struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
 218        struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
 219        struct i915_vma **vma;
 220        unsigned int *flags;
 221
 222        struct intel_engine_cs *engine; /** engine to queue the request to */
 223        struct intel_context *context; /* logical state for the request */
 224        struct i915_gem_context *gem_context; /** caller's context */
 225        struct i915_address_space *vm; /** GTT and vma for the request */
 226
 227        struct i915_request *request; /** our request to build */
 228        struct i915_vma *batch; /** identity of the batch obj/vma */
 229
 230        /** actual size of execobj[] as we may extend it for the cmdparser */
 231        unsigned int buffer_count;
 232
 233        /** list of vma not yet bound during reservation phase */
 234        struct list_head unbound;
 235
 236        /** list of vma that have execobj.relocation_count */
 237        struct list_head relocs;
 238
 239        /**
 240         * Track the most recently used object for relocations, as we
 241         * frequently have to perform multiple relocations within the same
 242         * obj/page
 243         */
 244        struct reloc_cache {
 245                struct drm_mm_node node; /** temporary GTT binding */
 246                unsigned long vaddr; /** Current kmap address */
 247                unsigned long page; /** Currently mapped page index */
 248                unsigned int gen; /** Cached value of INTEL_GEN */
 249                bool use_64bit_reloc : 1;
 250                bool has_llc : 1;
 251                bool has_fence : 1;
 252                bool needs_unfenced : 1;
 253
 254                struct i915_request *rq;
 255                u32 *rq_cmd;
 256                unsigned int rq_size;
 257        } reloc_cache;
 258
 259        u64 invalid_flags; /** Set of execobj.flags that are invalid */
 260        u32 context_flags; /** Set of execobj.flags to insert from the ctx */
 261
 262        u32 batch_start_offset; /** Location within object of batch */
 263        u32 batch_len; /** Length of batch within object */
 264        u32 batch_flags; /** Flags composed for emit_bb_start() */
 265
 266        /**
 267         * Indicate either the size of the hastable used to resolve
 268         * relocation handles, or if negative that we are using a direct
 269         * index into the execobj[].
 270         */
 271        int lut_size;
 272        struct hlist_head *buckets; /** ht for relocation handles */
 273};
 274
 275#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
 276
 277/*
 278 * Used to convert any address to canonical form.
 279 * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
 280 * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
 281 * addresses to be in a canonical form:
 282 * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
 283 * canonical form [63:48] == [47]."
 284 */
 285#define GEN8_HIGH_ADDRESS_BIT 47
 286static inline u64 gen8_canonical_addr(u64 address)
 287{
 288        return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
 289}
 290
 291static inline u64 gen8_noncanonical_addr(u64 address)
 292{
 293        return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
 294}
 295
 296static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 297{
 298        return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len;
 299}
 300
 301static int eb_create(struct i915_execbuffer *eb)
 302{
 303        if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
 304                unsigned int size = 1 + ilog2(eb->buffer_count);
 305
 306                /*
 307                 * Without a 1:1 association between relocation handles and
 308                 * the execobject[] index, we instead create a hashtable.
 309                 * We size it dynamically based on available memory, starting
 310                 * first with 1:1 assocative hash and scaling back until
 311                 * the allocation succeeds.
 312                 *
 313                 * Later on we use a positive lut_size to indicate we are
 314                 * using this hashtable, and a negative value to indicate a
 315                 * direct lookup.
 316                 */
 317                do {
 318                        gfp_t flags;
 319
 320                        /* While we can still reduce the allocation size, don't
 321                         * raise a warning and allow the allocation to fail.
 322                         * On the last pass though, we want to try as hard
 323                         * as possible to perform the allocation and warn
 324                         * if it fails.
 325                         */
 326                        flags = GFP_KERNEL;
 327                        if (size > 1)
 328                                flags |= __GFP_NORETRY | __GFP_NOWARN;
 329
 330                        eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
 331                                              flags);
 332                        if (eb->buckets)
 333                                break;
 334                } while (--size);
 335
 336                if (unlikely(!size))
 337                        return -ENOMEM;
 338
 339                eb->lut_size = size;
 340        } else {
 341                eb->lut_size = -eb->buffer_count;
 342        }
 343
 344        return 0;
 345}
 346
 347static bool
 348eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
 349                 const struct i915_vma *vma,
 350                 unsigned int flags)
 351{
 352        if (vma->node.size < entry->pad_to_size)
 353                return true;
 354
 355        if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
 356                return true;
 357
 358        if (flags & EXEC_OBJECT_PINNED &&
 359            vma->node.start != entry->offset)
 360                return true;
 361
 362        if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
 363            vma->node.start < BATCH_OFFSET_BIAS)
 364                return true;
 365
 366        if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
 367            (vma->node.start + vma->node.size - 1) >> 32)
 368                return true;
 369
 370        if (flags & __EXEC_OBJECT_NEEDS_MAP &&
 371            !i915_vma_is_map_and_fenceable(vma))
 372                return true;
 373
 374        return false;
 375}
 376
 377static inline bool
 378eb_pin_vma(struct i915_execbuffer *eb,
 379           const struct drm_i915_gem_exec_object2 *entry,
 380           struct i915_vma *vma)
 381{
 382        unsigned int exec_flags = *vma->exec_flags;
 383        u64 pin_flags;
 384
 385        if (vma->node.size)
 386                pin_flags = vma->node.start;
 387        else
 388                pin_flags = entry->offset & PIN_OFFSET_MASK;
 389
 390        pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
 391        if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT))
 392                pin_flags |= PIN_GLOBAL;
 393
 394        if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
 395                return false;
 396
 397        if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
 398                if (unlikely(i915_vma_pin_fence(vma))) {
 399                        i915_vma_unpin(vma);
 400                        return false;
 401                }
 402
 403                if (vma->fence)
 404                        exec_flags |= __EXEC_OBJECT_HAS_FENCE;
 405        }
 406
 407        *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
 408        return !eb_vma_misplaced(entry, vma, exec_flags);
 409}
 410
 411static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
 412{
 413        GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
 414
 415        if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
 416                __i915_vma_unpin_fence(vma);
 417
 418        __i915_vma_unpin(vma);
 419}
 420
 421static inline void
 422eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags)
 423{
 424        if (!(*flags & __EXEC_OBJECT_HAS_PIN))
 425                return;
 426
 427        __eb_unreserve_vma(vma, *flags);
 428        *flags &= ~__EXEC_OBJECT_RESERVED;
 429}
 430
 431static int
 432eb_validate_vma(struct i915_execbuffer *eb,
 433                struct drm_i915_gem_exec_object2 *entry,
 434                struct i915_vma *vma)
 435{
 436        if (unlikely(entry->flags & eb->invalid_flags))
 437                return -EINVAL;
 438
 439        if (unlikely(entry->alignment && !is_power_of_2(entry->alignment)))
 440                return -EINVAL;
 441
 442        /*
 443         * Offset can be used as input (EXEC_OBJECT_PINNED), reject
 444         * any non-page-aligned or non-canonical addresses.
 445         */
 446        if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&
 447                     entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK)))
 448                return -EINVAL;
 449
 450        /* pad_to_size was once a reserved field, so sanitize it */
 451        if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) {
 452                if (unlikely(offset_in_page(entry->pad_to_size)))
 453                        return -EINVAL;
 454        } else {
 455                entry->pad_to_size = 0;
 456        }
 457
 458        if (unlikely(vma->exec_flags)) {
 459                DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
 460                          entry->handle, (int)(entry - eb->exec));
 461                return -EINVAL;
 462        }
 463
 464        /*
 465         * From drm_mm perspective address space is continuous,
 466         * so from this point we're always using non-canonical
 467         * form internally.
 468         */
 469        entry->offset = gen8_noncanonical_addr(entry->offset);
 470
 471        if (!eb->reloc_cache.has_fence) {
 472                entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
 473        } else {
 474                if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
 475                     eb->reloc_cache.needs_unfenced) &&
 476                    i915_gem_object_is_tiled(vma->obj))
 477                        entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
 478        }
 479
 480        if (!(entry->flags & EXEC_OBJECT_PINNED))
 481                entry->flags |= eb->context_flags;
 482
 483        return 0;
 484}
 485
 486static int
 487eb_add_vma(struct i915_execbuffer *eb,
 488           unsigned int i, unsigned batch_idx,
 489           struct i915_vma *vma)
 490{
 491        struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
 492        int err;
 493
 494        GEM_BUG_ON(i915_vma_is_closed(vma));
 495
 496        if (!(eb->args->flags & __EXEC_VALIDATED)) {
 497                err = eb_validate_vma(eb, entry, vma);
 498                if (unlikely(err))
 499                        return err;
 500        }
 501
 502        if (eb->lut_size > 0) {
 503                vma->exec_handle = entry->handle;
 504                hlist_add_head(&vma->exec_node,
 505                               &eb->buckets[hash_32(entry->handle,
 506                                                    eb->lut_size)]);
 507        }
 508
 509        if (entry->relocation_count)
 510                list_add_tail(&vma->reloc_link, &eb->relocs);
 511
 512        /*
 513         * Stash a pointer from the vma to execobj, so we can query its flags,
 514         * size, alignment etc as provided by the user. Also we stash a pointer
 515         * to the vma inside the execobj so that we can use a direct lookup
 516         * to find the right target VMA when doing relocations.
 517         */
 518        eb->vma[i] = vma;
 519        eb->flags[i] = entry->flags;
 520        vma->exec_flags = &eb->flags[i];
 521
 522        /*
 523         * SNA is doing fancy tricks with compressing batch buffers, which leads
 524         * to negative relocation deltas. Usually that works out ok since the
 525         * relocate address is still positive, except when the batch is placed
 526         * very low in the GTT. Ensure this doesn't happen.
 527         *
 528         * Note that actual hangs have only been observed on gen7, but for
 529         * paranoia do it everywhere.
 530         */
 531        if (i == batch_idx) {
 532                if (entry->relocation_count &&
 533                    !(eb->flags[i] & EXEC_OBJECT_PINNED))
 534                        eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
 535                if (eb->reloc_cache.has_fence)
 536                        eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
 537
 538                eb->batch = vma;
 539        }
 540
 541        err = 0;
 542        if (eb_pin_vma(eb, entry, vma)) {
 543                if (entry->offset != vma->node.start) {
 544                        entry->offset = vma->node.start | UPDATE;
 545                        eb->args->flags |= __EXEC_HAS_RELOC;
 546                }
 547        } else {
 548                eb_unreserve_vma(vma, vma->exec_flags);
 549
 550                list_add_tail(&vma->exec_link, &eb->unbound);
 551                if (drm_mm_node_allocated(&vma->node))
 552                        err = i915_vma_unbind(vma);
 553                if (unlikely(err))
 554                        vma->exec_flags = NULL;
 555        }
 556        return err;
 557}
 558
 559static inline int use_cpu_reloc(const struct reloc_cache *cache,
 560                                const struct drm_i915_gem_object *obj)
 561{
 562        if (!i915_gem_object_has_struct_page(obj))
 563                return false;
 564
 565        if (DBG_FORCE_RELOC == FORCE_CPU_RELOC)
 566                return true;
 567
 568        if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
 569                return false;
 570
 571        return (cache->has_llc ||
 572                obj->cache_dirty ||
 573                obj->cache_level != I915_CACHE_NONE);
 574}
 575
 576static int eb_reserve_vma(const struct i915_execbuffer *eb,
 577                          struct i915_vma *vma)
 578{
 579        struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
 580        unsigned int exec_flags = *vma->exec_flags;
 581        u64 pin_flags;
 582        int err;
 583
 584        pin_flags = PIN_USER | PIN_NONBLOCK;
 585        if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
 586                pin_flags |= PIN_GLOBAL;
 587
 588        /*
 589         * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
 590         * limit address to the first 4GBs for unflagged objects.
 591         */
 592        if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
 593                pin_flags |= PIN_ZONE_4G;
 594
 595        if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
 596                pin_flags |= PIN_MAPPABLE;
 597
 598        if (exec_flags & EXEC_OBJECT_PINNED) {
 599                pin_flags |= entry->offset | PIN_OFFSET_FIXED;
 600                pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */
 601        } else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) {
 602                pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
 603        }
 604
 605        err = i915_vma_pin(vma,
 606                           entry->pad_to_size, entry->alignment,
 607                           pin_flags);
 608        if (err)
 609                return err;
 610
 611        if (entry->offset != vma->node.start) {
 612                entry->offset = vma->node.start | UPDATE;
 613                eb->args->flags |= __EXEC_HAS_RELOC;
 614        }
 615
 616        if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
 617                err = i915_vma_pin_fence(vma);
 618                if (unlikely(err)) {
 619                        i915_vma_unpin(vma);
 620                        return err;
 621                }
 622
 623                if (vma->fence)
 624                        exec_flags |= __EXEC_OBJECT_HAS_FENCE;
 625        }
 626
 627        *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
 628        GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags));
 629
 630        return 0;
 631}
 632
 633static int eb_reserve(struct i915_execbuffer *eb)
 634{
 635        const unsigned int count = eb->buffer_count;
 636        struct list_head last;
 637        struct i915_vma *vma;
 638        unsigned int i, pass;
 639        int err;
 640
 641        /*
 642         * Attempt to pin all of the buffers into the GTT.
 643         * This is done in 3 phases:
 644         *
 645         * 1a. Unbind all objects that do not match the GTT constraints for
 646         *     the execbuffer (fenceable, mappable, alignment etc).
 647         * 1b. Increment pin count for already bound objects.
 648         * 2.  Bind new objects.
 649         * 3.  Decrement pin count.
 650         *
 651         * This avoid unnecessary unbinding of later objects in order to make
 652         * room for the earlier objects *unless* we need to defragment.
 653         */
 654
 655        pass = 0;
 656        err = 0;
 657        do {
 658                list_for_each_entry(vma, &eb->unbound, exec_link) {
 659                        err = eb_reserve_vma(eb, vma);
 660                        if (err)
 661                                break;
 662                }
 663                if (err != -ENOSPC)
 664                        return err;
 665
 666                /* Resort *all* the objects into priority order */
 667                INIT_LIST_HEAD(&eb->unbound);
 668                INIT_LIST_HEAD(&last);
 669                for (i = 0; i < count; i++) {
 670                        unsigned int flags = eb->flags[i];
 671                        struct i915_vma *vma = eb->vma[i];
 672
 673                        if (flags & EXEC_OBJECT_PINNED &&
 674                            flags & __EXEC_OBJECT_HAS_PIN)
 675                                continue;
 676
 677                        eb_unreserve_vma(vma, &eb->flags[i]);
 678
 679                        if (flags & EXEC_OBJECT_PINNED)
 680                                /* Pinned must have their slot */
 681                                list_add(&vma->exec_link, &eb->unbound);
 682                        else if (flags & __EXEC_OBJECT_NEEDS_MAP)
 683                                /* Map require the lowest 256MiB (aperture) */
 684                                list_add_tail(&vma->exec_link, &eb->unbound);
 685                        else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
 686                                /* Prioritise 4GiB region for restricted bo */
 687                                list_add(&vma->exec_link, &last);
 688                        else
 689                                list_add_tail(&vma->exec_link, &last);
 690                }
 691                list_splice_tail(&last, &eb->unbound);
 692
 693                switch (pass++) {
 694                case 0:
 695                        break;
 696
 697                case 1:
 698                        /* Too fragmented, unbind everything and retry */
 699                        err = i915_gem_evict_vm(eb->vm);
 700                        if (err)
 701                                return err;
 702                        break;
 703
 704                default:
 705                        return -ENOSPC;
 706                }
 707        } while (1);
 708}
 709
 710static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
 711{
 712        if (eb->args->flags & I915_EXEC_BATCH_FIRST)
 713                return 0;
 714        else
 715                return eb->buffer_count - 1;
 716}
 717
 718static int eb_select_context(struct i915_execbuffer *eb)
 719{
 720        struct i915_gem_context *ctx;
 721
 722        ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
 723        if (unlikely(!ctx))
 724                return -ENOENT;
 725
 726        eb->gem_context = ctx;
 727        if (ctx->vm) {
 728                eb->vm = ctx->vm;
 729                eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
 730        } else {
 731                eb->vm = &eb->i915->ggtt.vm;
 732        }
 733
 734        eb->context_flags = 0;
 735        if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags))
 736                eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS;
 737
 738        return 0;
 739}
 740
 741static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
 742{
 743        struct i915_request *rq;
 744
 745        /*
 746         * Completely unscientific finger-in-the-air estimates for suitable
 747         * maximum user request size (to avoid blocking) and then backoff.
 748         */
 749        if (intel_ring_update_space(ring) >= PAGE_SIZE)
 750                return NULL;
 751
 752        /*
 753         * Find a request that after waiting upon, there will be at least half
 754         * the ring available. The hysteresis allows us to compete for the
 755         * shared ring and should mean that we sleep less often prior to
 756         * claiming our resources, but not so long that the ring completely
 757         * drains before we can submit our next request.
 758         */
 759        list_for_each_entry(rq, &ring->request_list, ring_link) {
 760                if (__intel_ring_space(rq->postfix,
 761                                       ring->emit, ring->size) > ring->size / 2)
 762                        break;
 763        }
 764        if (&rq->ring_link == &ring->request_list)
 765                return NULL; /* weird, we will check again later for real */
 766
 767        return i915_request_get(rq);
 768}
 769
 770static int eb_wait_for_ring(const struct i915_execbuffer *eb)
 771{
 772        struct i915_request *rq;
 773        int ret = 0;
 774
 775        /*
 776         * Apply a light amount of backpressure to prevent excessive hogs
 777         * from blocking waiting for space whilst holding struct_mutex and
 778         * keeping all of their resources pinned.
 779         */
 780
 781        rq = __eb_wait_for_ring(eb->context->ring);
 782        if (rq) {
 783                mutex_unlock(&eb->i915->drm.struct_mutex);
 784
 785                if (i915_request_wait(rq,
 786                                      I915_WAIT_INTERRUPTIBLE,
 787                                      MAX_SCHEDULE_TIMEOUT) < 0)
 788                        ret = -EINTR;
 789
 790                i915_request_put(rq);
 791
 792                mutex_lock(&eb->i915->drm.struct_mutex);
 793        }
 794
 795        return ret;
 796}
 797
 798static int eb_lookup_vmas(struct i915_execbuffer *eb)
 799{
 800        struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
 801        struct drm_i915_gem_object *obj;
 802        unsigned int i, batch;
 803        int err;
 804
 805        if (unlikely(i915_gem_context_is_banned(eb->gem_context)))
 806                return -EIO;
 807
 808        INIT_LIST_HEAD(&eb->relocs);
 809        INIT_LIST_HEAD(&eb->unbound);
 810
 811        batch = eb_batch_index(eb);
 812
 813        mutex_lock(&eb->gem_context->mutex);
 814        if (unlikely(i915_gem_context_is_closed(eb->gem_context))) {
 815                err = -ENOENT;
 816                goto err_ctx;
 817        }
 818
 819        for (i = 0; i < eb->buffer_count; i++) {
 820                u32 handle = eb->exec[i].handle;
 821                struct i915_lut_handle *lut;
 822                struct i915_vma *vma;
 823
 824                vma = radix_tree_lookup(handles_vma, handle);
 825                if (likely(vma))
 826                        goto add_vma;
 827
 828                obj = i915_gem_object_lookup(eb->file, handle);
 829                if (unlikely(!obj)) {
 830                        err = -ENOENT;
 831                        goto err_vma;
 832                }
 833
 834                vma = i915_vma_instance(obj, eb->vm, NULL);
 835                if (IS_ERR(vma)) {
 836                        err = PTR_ERR(vma);
 837                        goto err_obj;
 838                }
 839
 840                lut = i915_lut_handle_alloc();
 841                if (unlikely(!lut)) {
 842                        err = -ENOMEM;
 843                        goto err_obj;
 844                }
 845
 846                err = radix_tree_insert(handles_vma, handle, vma);
 847                if (unlikely(err)) {
 848                        i915_lut_handle_free(lut);
 849                        goto err_obj;
 850                }
 851
 852                /* transfer ref to lut */
 853                if (!atomic_fetch_inc(&vma->open_count))
 854                        i915_vma_reopen(vma);
 855                lut->handle = handle;
 856                lut->ctx = eb->gem_context;
 857
 858                i915_gem_object_lock(obj);
 859                list_add(&lut->obj_link, &obj->lut_list);
 860                i915_gem_object_unlock(obj);
 861
 862add_vma:
 863                err = eb_add_vma(eb, i, batch, vma);
 864                if (unlikely(err))
 865                        goto err_vma;
 866
 867                GEM_BUG_ON(vma != eb->vma[i]);
 868                GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
 869                GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
 870                           eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i]));
 871        }
 872
 873        mutex_unlock(&eb->gem_context->mutex);
 874
 875        eb->args->flags |= __EXEC_VALIDATED;
 876        return eb_reserve(eb);
 877
 878err_obj:
 879        i915_gem_object_put(obj);
 880err_vma:
 881        eb->vma[i] = NULL;
 882err_ctx:
 883        mutex_unlock(&eb->gem_context->mutex);
 884        return err;
 885}
 886
 887static struct i915_vma *
 888eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
 889{
 890        if (eb->lut_size < 0) {
 891                if (handle >= -eb->lut_size)
 892                        return NULL;
 893                return eb->vma[handle];
 894        } else {
 895                struct hlist_head *head;
 896                struct i915_vma *vma;
 897
 898                head = &eb->buckets[hash_32(handle, eb->lut_size)];
 899                hlist_for_each_entry(vma, head, exec_node) {
 900                        if (vma->exec_handle == handle)
 901                                return vma;
 902                }
 903                return NULL;
 904        }
 905}
 906
 907static void eb_release_vmas(const struct i915_execbuffer *eb)
 908{
 909        const unsigned int count = eb->buffer_count;
 910        unsigned int i;
 911
 912        for (i = 0; i < count; i++) {
 913                struct i915_vma *vma = eb->vma[i];
 914                unsigned int flags = eb->flags[i];
 915
 916                if (!vma)
 917                        break;
 918
 919                GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
 920                vma->exec_flags = NULL;
 921                eb->vma[i] = NULL;
 922
 923                if (flags & __EXEC_OBJECT_HAS_PIN)
 924                        __eb_unreserve_vma(vma, flags);
 925
 926                if (flags & __EXEC_OBJECT_HAS_REF)
 927                        i915_vma_put(vma);
 928        }
 929}
 930
 931static void eb_reset_vmas(const struct i915_execbuffer *eb)
 932{
 933        eb_release_vmas(eb);
 934        if (eb->lut_size > 0)
 935                memset(eb->buckets, 0,
 936                       sizeof(struct hlist_head) << eb->lut_size);
 937}
 938
 939static void eb_destroy(const struct i915_execbuffer *eb)
 940{
 941        GEM_BUG_ON(eb->reloc_cache.rq);
 942
 943        if (eb->lut_size > 0)
 944                kfree(eb->buckets);
 945}
 946
 947static inline u64
 948relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
 949                  const struct i915_vma *target)
 950{
 951        return gen8_canonical_addr((int)reloc->delta + target->node.start);
 952}
 953
 954static void reloc_cache_init(struct reloc_cache *cache,
 955                             struct drm_i915_private *i915)
 956{
 957        cache->page = -1;
 958        cache->vaddr = 0;
 959        /* Must be a variable in the struct to allow GCC to unroll. */
 960        cache->gen = INTEL_GEN(i915);
 961        cache->has_llc = HAS_LLC(i915);
 962        cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
 963        cache->has_fence = cache->gen < 4;
 964        cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
 965        cache->node.allocated = false;
 966        cache->rq = NULL;
 967        cache->rq_size = 0;
 968}
 969
 970static inline void *unmask_page(unsigned long p)
 971{
 972        return (void *)(uintptr_t)(p & PAGE_MASK);
 973}
 974
 975static inline unsigned int unmask_flags(unsigned long p)
 976{
 977        return p & ~PAGE_MASK;
 978}
 979
 980#define KMAP 0x4 /* after CLFLUSH_FLAGS */
 981
 982static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
 983{
 984        struct drm_i915_private *i915 =
 985                container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
 986        return &i915->ggtt;
 987}
 988
 989static void reloc_gpu_flush(struct reloc_cache *cache)
 990{
 991        GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32));
 992        cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
 993
 994        __i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size);
 995        i915_gem_object_unpin_map(cache->rq->batch->obj);
 996
 997        i915_gem_chipset_flush(cache->rq->i915);
 998
 999        i915_request_add(cache->rq);
1000        cache->rq = NULL;
1001}
1002
1003static void reloc_cache_reset(struct reloc_cache *cache)
1004{
1005        void *vaddr;
1006
1007        if (cache->rq)
1008                reloc_gpu_flush(cache);
1009
1010        if (!cache->vaddr)
1011                return;
1012
1013        vaddr = unmask_page(cache->vaddr);
1014        if (cache->vaddr & KMAP) {
1015                if (cache->vaddr & CLFLUSH_AFTER)
1016                        mb();
1017
1018                kunmap_atomic(vaddr);
1019                i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
1020        } else {
1021                wmb();
1022                io_mapping_unmap_atomic((void __iomem *)vaddr);
1023                if (cache->node.allocated) {
1024                        struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1025
1026                        ggtt->vm.clear_range(&ggtt->vm,
1027                                             cache->node.start,
1028                                             cache->node.size);
1029                        drm_mm_remove_node(&cache->node);
1030                } else {
1031                        i915_vma_unpin((struct i915_vma *)cache->node.mm);
1032                }
1033        }
1034
1035        cache->vaddr = 0;
1036        cache->page = -1;
1037}
1038
1039static void *reloc_kmap(struct drm_i915_gem_object *obj,
1040                        struct reloc_cache *cache,
1041                        unsigned long page)
1042{
1043        void *vaddr;
1044
1045        if (cache->vaddr) {
1046                kunmap_atomic(unmask_page(cache->vaddr));
1047        } else {
1048                unsigned int flushes;
1049                int err;
1050
1051                err = i915_gem_object_prepare_write(obj, &flushes);
1052                if (err)
1053                        return ERR_PTR(err);
1054
1055                BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
1056                BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
1057
1058                cache->vaddr = flushes | KMAP;
1059                cache->node.mm = (void *)obj;
1060                if (flushes)
1061                        mb();
1062        }
1063
1064        vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
1065        cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
1066        cache->page = page;
1067
1068        return vaddr;
1069}
1070
1071static void *reloc_iomap(struct drm_i915_gem_object *obj,
1072                         struct reloc_cache *cache,
1073                         unsigned long page)
1074{
1075        struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1076        unsigned long offset;
1077        void *vaddr;
1078
1079        if (cache->vaddr) {
1080                io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
1081        } else {
1082                struct i915_vma *vma;
1083                int err;
1084
1085                if (use_cpu_reloc(cache, obj))
1086                        return NULL;
1087
1088                i915_gem_object_lock(obj);
1089                err = i915_gem_object_set_to_gtt_domain(obj, true);
1090                i915_gem_object_unlock(obj);
1091                if (err)
1092                        return ERR_PTR(err);
1093
1094                vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1095                                               PIN_MAPPABLE |
1096                                               PIN_NONBLOCK |
1097                                               PIN_NONFAULT);
1098                if (IS_ERR(vma)) {
1099                        memset(&cache->node, 0, sizeof(cache->node));
1100                        err = drm_mm_insert_node_in_range
1101                                (&ggtt->vm.mm, &cache->node,
1102                                 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
1103                                 0, ggtt->mappable_end,
1104                                 DRM_MM_INSERT_LOW);
1105                        if (err) /* no inactive aperture space, use cpu reloc */
1106                                return NULL;
1107                } else {
1108                        err = i915_vma_put_fence(vma);
1109                        if (err) {
1110                                i915_vma_unpin(vma);
1111                                return ERR_PTR(err);
1112                        }
1113
1114                        cache->node.start = vma->node.start;
1115                        cache->node.mm = (void *)vma;
1116                }
1117        }
1118
1119        offset = cache->node.start;
1120        if (cache->node.allocated) {
1121                wmb();
1122                ggtt->vm.insert_page(&ggtt->vm,
1123                                     i915_gem_object_get_dma_address(obj, page),
1124                                     offset, I915_CACHE_NONE, 0);
1125        } else {
1126                offset += page << PAGE_SHIFT;
1127        }
1128
1129        vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
1130                                                         offset);
1131        cache->page = page;
1132        cache->vaddr = (unsigned long)vaddr;
1133
1134        return vaddr;
1135}
1136
1137static void *reloc_vaddr(struct drm_i915_gem_object *obj,
1138                         struct reloc_cache *cache,
1139                         unsigned long page)
1140{
1141        void *vaddr;
1142
1143        if (cache->page == page) {
1144                vaddr = unmask_page(cache->vaddr);
1145        } else {
1146                vaddr = NULL;
1147                if ((cache->vaddr & KMAP) == 0)
1148                        vaddr = reloc_iomap(obj, cache, page);
1149                if (!vaddr)
1150                        vaddr = reloc_kmap(obj, cache, page);
1151        }
1152
1153        return vaddr;
1154}
1155
1156static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
1157{
1158        if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
1159                if (flushes & CLFLUSH_BEFORE) {
1160                        clflushopt(addr);
1161                        mb();
1162                }
1163
1164                *addr = value;
1165
1166                /*
1167                 * Writes to the same cacheline are serialised by the CPU
1168                 * (including clflush). On the write path, we only require
1169                 * that it hits memory in an orderly fashion and place
1170                 * mb barriers at the start and end of the relocation phase
1171                 * to ensure ordering of clflush wrt to the system.
1172                 */
1173                if (flushes & CLFLUSH_AFTER)
1174                        clflushopt(addr);
1175        } else
1176                *addr = value;
1177}
1178
1179static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
1180{
1181        struct drm_i915_gem_object *obj = vma->obj;
1182        int err;
1183
1184        i915_vma_lock(vma);
1185
1186        if (obj->cache_dirty & ~obj->cache_coherent)
1187                i915_gem_clflush_object(obj, 0);
1188        obj->write_domain = 0;
1189
1190        err = i915_request_await_object(rq, vma->obj, true);
1191        if (err == 0)
1192                err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1193
1194        i915_vma_unlock(vma);
1195
1196        return err;
1197}
1198
1199static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
1200                             struct i915_vma *vma,
1201                             unsigned int len)
1202{
1203        struct reloc_cache *cache = &eb->reloc_cache;
1204        struct drm_i915_gem_object *obj;
1205        struct i915_request *rq;
1206        struct i915_vma *batch;
1207        u32 *cmd;
1208        int err;
1209
1210        obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE);
1211        if (IS_ERR(obj))
1212                return PTR_ERR(obj);
1213
1214        cmd = i915_gem_object_pin_map(obj,
1215                                      cache->has_llc ?
1216                                      I915_MAP_FORCE_WB :
1217                                      I915_MAP_FORCE_WC);
1218        i915_gem_object_unpin_pages(obj);
1219        if (IS_ERR(cmd))
1220                return PTR_ERR(cmd);
1221
1222        batch = i915_vma_instance(obj, vma->vm, NULL);
1223        if (IS_ERR(batch)) {
1224                err = PTR_ERR(batch);
1225                goto err_unmap;
1226        }
1227
1228        err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
1229        if (err)
1230                goto err_unmap;
1231
1232        rq = i915_request_create(eb->context);
1233        if (IS_ERR(rq)) {
1234                err = PTR_ERR(rq);
1235                goto err_unpin;
1236        }
1237
1238        err = reloc_move_to_gpu(rq, vma);
1239        if (err)
1240                goto err_request;
1241
1242        err = eb->engine->emit_bb_start(rq,
1243                                        batch->node.start, PAGE_SIZE,
1244                                        cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
1245        if (err)
1246                goto skip_request;
1247
1248        i915_vma_lock(batch);
1249        GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
1250        err = i915_vma_move_to_active(batch, rq, 0);
1251        i915_vma_unlock(batch);
1252        if (err)
1253                goto skip_request;
1254
1255        rq->batch = batch;
1256        i915_vma_unpin(batch);
1257
1258        cache->rq = rq;
1259        cache->rq_cmd = cmd;
1260        cache->rq_size = 0;
1261
1262        /* Return with batch mapping (cmd) still pinned */
1263        return 0;
1264
1265skip_request:
1266        i915_request_skip(rq, err);
1267err_request:
1268        i915_request_add(rq);
1269err_unpin:
1270        i915_vma_unpin(batch);
1271err_unmap:
1272        i915_gem_object_unpin_map(obj);
1273        return err;
1274}
1275
1276static u32 *reloc_gpu(struct i915_execbuffer *eb,
1277                      struct i915_vma *vma,
1278                      unsigned int len)
1279{
1280        struct reloc_cache *cache = &eb->reloc_cache;
1281        u32 *cmd;
1282
1283        if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
1284                reloc_gpu_flush(cache);
1285
1286        if (unlikely(!cache->rq)) {
1287                int err;
1288
1289                /* If we need to copy for the cmdparser, we will stall anyway */
1290                if (eb_use_cmdparser(eb))
1291                        return ERR_PTR(-EWOULDBLOCK);
1292
1293                if (!intel_engine_can_store_dword(eb->engine))
1294                        return ERR_PTR(-ENODEV);
1295
1296                err = __reloc_gpu_alloc(eb, vma, len);
1297                if (unlikely(err))
1298                        return ERR_PTR(err);
1299        }
1300
1301        cmd = cache->rq_cmd + cache->rq_size;
1302        cache->rq_size += len;
1303
1304        return cmd;
1305}
1306
1307static u64
1308relocate_entry(struct i915_vma *vma,
1309               const struct drm_i915_gem_relocation_entry *reloc,
1310               struct i915_execbuffer *eb,
1311               const struct i915_vma *target)
1312{
1313        u64 offset = reloc->offset;
1314        u64 target_offset = relocation_target(reloc, target);
1315        bool wide = eb->reloc_cache.use_64bit_reloc;
1316        void *vaddr;
1317
1318        if (!eb->reloc_cache.vaddr &&
1319            (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
1320             !reservation_object_test_signaled_rcu(vma->resv, true))) {
1321                const unsigned int gen = eb->reloc_cache.gen;
1322                unsigned int len;
1323                u32 *batch;
1324                u64 addr;
1325
1326                if (wide)
1327                        len = offset & 7 ? 8 : 5;
1328                else if (gen >= 4)
1329                        len = 4;
1330                else
1331                        len = 3;
1332
1333                batch = reloc_gpu(eb, vma, len);
1334                if (IS_ERR(batch))
1335                        goto repeat;
1336
1337                addr = gen8_canonical_addr(vma->node.start + offset);
1338                if (wide) {
1339                        if (offset & 7) {
1340                                *batch++ = MI_STORE_DWORD_IMM_GEN4;
1341                                *batch++ = lower_32_bits(addr);
1342                                *batch++ = upper_32_bits(addr);
1343                                *batch++ = lower_32_bits(target_offset);
1344
1345                                addr = gen8_canonical_addr(addr + 4);
1346
1347                                *batch++ = MI_STORE_DWORD_IMM_GEN4;
1348                                *batch++ = lower_32_bits(addr);
1349                                *batch++ = upper_32_bits(addr);
1350                                *batch++ = upper_32_bits(target_offset);
1351                        } else {
1352                                *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
1353                                *batch++ = lower_32_bits(addr);
1354                                *batch++ = upper_32_bits(addr);
1355                                *batch++ = lower_32_bits(target_offset);
1356                                *batch++ = upper_32_bits(target_offset);
1357                        }
1358                } else if (gen >= 6) {
1359                        *batch++ = MI_STORE_DWORD_IMM_GEN4;
1360                        *batch++ = 0;
1361                        *batch++ = addr;
1362                        *batch++ = target_offset;
1363                } else if (gen >= 4) {
1364                        *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1365                        *batch++ = 0;
1366                        *batch++ = addr;
1367                        *batch++ = target_offset;
1368                } else {
1369                        *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
1370                        *batch++ = addr;
1371                        *batch++ = target_offset;
1372                }
1373
1374                goto out;
1375        }
1376
1377repeat:
1378        vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
1379        if (IS_ERR(vaddr))
1380                return PTR_ERR(vaddr);
1381
1382        clflush_write32(vaddr + offset_in_page(offset),
1383                        lower_32_bits(target_offset),
1384                        eb->reloc_cache.vaddr);
1385
1386        if (wide) {
1387                offset += sizeof(u32);
1388                target_offset >>= 32;
1389                wide = false;
1390                goto repeat;
1391        }
1392
1393out:
1394        return target->node.start | UPDATE;
1395}
1396
1397static u64
1398eb_relocate_entry(struct i915_execbuffer *eb,
1399                  struct i915_vma *vma,
1400                  const struct drm_i915_gem_relocation_entry *reloc)
1401{
1402        struct i915_vma *target;
1403        int err;
1404
1405        /* we've already hold a reference to all valid objects */
1406        target = eb_get_vma(eb, reloc->target_handle);
1407        if (unlikely(!target))
1408                return -ENOENT;
1409
1410        /* Validate that the target is in a valid r/w GPU domain */
1411        if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
1412                DRM_DEBUG("reloc with multiple write domains: "
1413                          "target %d offset %d "
1414                          "read %08x write %08x",
1415                          reloc->target_handle,
1416                          (int) reloc->offset,
1417                          reloc->read_domains,
1418                          reloc->write_domain);
1419                return -EINVAL;
1420        }
1421        if (unlikely((reloc->write_domain | reloc->read_domains)
1422                     & ~I915_GEM_GPU_DOMAINS)) {
1423                DRM_DEBUG("reloc with read/write non-GPU domains: "
1424                          "target %d offset %d "
1425                          "read %08x write %08x",
1426                          reloc->target_handle,
1427                          (int) reloc->offset,
1428                          reloc->read_domains,
1429                          reloc->write_domain);
1430                return -EINVAL;
1431        }
1432
1433        if (reloc->write_domain) {
1434                *target->exec_flags |= EXEC_OBJECT_WRITE;
1435
1436                /*
1437                 * Sandybridge PPGTT errata: We need a global gtt mapping
1438                 * for MI and pipe_control writes because the gpu doesn't
1439                 * properly redirect them through the ppgtt for non_secure
1440                 * batchbuffers.
1441                 */
1442                if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
1443                    IS_GEN(eb->i915, 6)) {
1444                        err = i915_vma_bind(target, target->obj->cache_level,
1445                                            PIN_GLOBAL);
1446                        if (WARN_ONCE(err,
1447                                      "Unexpected failure to bind target VMA!"))
1448                                return err;
1449                }
1450        }
1451
1452        /*
1453         * If the relocation already has the right value in it, no
1454         * more work needs to be done.
1455         */
1456        if (!DBG_FORCE_RELOC &&
1457            gen8_canonical_addr(target->node.start) == reloc->presumed_offset)
1458                return 0;
1459
1460        /* Check that the relocation address is valid... */
1461        if (unlikely(reloc->offset >
1462                     vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
1463                DRM_DEBUG("Relocation beyond object bounds: "
1464                          "target %d offset %d size %d.\n",
1465                          reloc->target_handle,
1466                          (int)reloc->offset,
1467                          (int)vma->size);
1468                return -EINVAL;
1469        }
1470        if (unlikely(reloc->offset & 3)) {
1471                DRM_DEBUG("Relocation not 4-byte aligned: "
1472                          "target %d offset %d.\n",
1473                          reloc->target_handle,
1474                          (int)reloc->offset);
1475                return -EINVAL;
1476        }
1477
1478        /*
1479         * If we write into the object, we need to force the synchronisation
1480         * barrier, either with an asynchronous clflush or if we executed the
1481         * patching using the GPU (though that should be serialised by the
1482         * timeline). To be completely sure, and since we are required to
1483         * do relocations we are already stalling, disable the user's opt
1484         * out of our synchronisation.
1485         */
1486        *vma->exec_flags &= ~EXEC_OBJECT_ASYNC;
1487
1488        /* and update the user's relocation entry */
1489        return relocate_entry(vma, reloc, eb, target);
1490}
1491
1492static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
1493{
1494#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
1495        struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
1496        struct drm_i915_gem_relocation_entry __user *urelocs;
1497        const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
1498        unsigned int remain;
1499
1500        urelocs = u64_to_user_ptr(entry->relocs_ptr);
1501        remain = entry->relocation_count;
1502        if (unlikely(remain > N_RELOC(ULONG_MAX)))
1503                return -EINVAL;
1504
1505        /*
1506         * We must check that the entire relocation array is safe
1507         * to read. However, if the array is not writable the user loses
1508         * the updated relocation values.
1509         */
1510        if (unlikely(!access_ok(urelocs, remain*sizeof(*urelocs))))
1511                return -EFAULT;
1512
1513        do {
1514                struct drm_i915_gem_relocation_entry *r = stack;
1515                unsigned int count =
1516                        min_t(unsigned int, remain, ARRAY_SIZE(stack));
1517                unsigned int copied;
1518
1519                /*
1520                 * This is the fast path and we cannot handle a pagefault
1521                 * whilst holding the struct mutex lest the user pass in the
1522                 * relocations contained within a mmaped bo. For in such a case
1523                 * we, the page fault handler would call i915_gem_fault() and
1524                 * we would try to acquire the struct mutex again. Obviously
1525                 * this is bad and so lockdep complains vehemently.
1526                 */
1527                pagefault_disable();
1528                copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
1529                pagefault_enable();
1530                if (unlikely(copied)) {
1531                        remain = -EFAULT;
1532                        goto out;
1533                }
1534
1535                remain -= count;
1536                do {
1537                        u64 offset = eb_relocate_entry(eb, vma, r);
1538
1539                        if (likely(offset == 0)) {
1540                        } else if ((s64)offset < 0) {
1541                                remain = (int)offset;
1542                                goto out;
1543                        } else {
1544                                /*
1545                                 * Note that reporting an error now
1546                                 * leaves everything in an inconsistent
1547                                 * state as we have *already* changed
1548                                 * the relocation value inside the
1549                                 * object. As we have not changed the
1550                                 * reloc.presumed_offset or will not
1551                                 * change the execobject.offset, on the
1552                                 * call we may not rewrite the value
1553                                 * inside the object, leaving it
1554                                 * dangling and causing a GPU hang. Unless
1555                                 * userspace dynamically rebuilds the
1556                                 * relocations on each execbuf rather than
1557                                 * presume a static tree.
1558                                 *
1559                                 * We did previously check if the relocations
1560                                 * were writable (access_ok), an error now
1561                                 * would be a strange race with mprotect,
1562                                 * having already demonstrated that we
1563                                 * can read from this userspace address.
1564                                 */
1565                                offset = gen8_canonical_addr(offset & ~UPDATE);
1566                                if (unlikely(__put_user(offset, &urelocs[r-stack].presumed_offset))) {
1567                                        remain = -EFAULT;
1568                                        goto out;
1569                                }
1570                        }
1571                } while (r++, --count);
1572                urelocs += ARRAY_SIZE(stack);
1573        } while (remain);
1574out:
1575        reloc_cache_reset(&eb->reloc_cache);
1576        return remain;
1577}
1578
1579static int
1580eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
1581{
1582        const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
1583        struct drm_i915_gem_relocation_entry *relocs =
1584                u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1585        unsigned int i;
1586        int err;
1587
1588        for (i = 0; i < entry->relocation_count; i++) {
1589                u64 offset = eb_relocate_entry(eb, vma, &relocs[i]);
1590
1591                if ((s64)offset < 0) {
1592                        err = (int)offset;
1593                        goto err;
1594                }
1595        }
1596        err = 0;
1597err:
1598        reloc_cache_reset(&eb->reloc_cache);
1599        return err;
1600}
1601
1602static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
1603{
1604        const char __user *addr, *end;
1605        unsigned long size;
1606        char __maybe_unused c;
1607
1608        size = entry->relocation_count;
1609        if (size == 0)
1610                return 0;
1611
1612        if (size > N_RELOC(ULONG_MAX))
1613                return -EINVAL;
1614
1615        addr = u64_to_user_ptr(entry->relocs_ptr);
1616        size *= sizeof(struct drm_i915_gem_relocation_entry);
1617        if (!access_ok(addr, size))
1618                return -EFAULT;
1619
1620        end = addr + size;
1621        for (; addr < end; addr += PAGE_SIZE) {
1622                int err = __get_user(c, addr);
1623                if (err)
1624                        return err;
1625        }
1626        return __get_user(c, end - 1);
1627}
1628
1629static int eb_copy_relocations(const struct i915_execbuffer *eb)
1630{
1631        struct drm_i915_gem_relocation_entry *relocs;
1632        const unsigned int count = eb->buffer_count;
1633        unsigned int i;
1634        int err;
1635
1636        for (i = 0; i < count; i++) {
1637                const unsigned int nreloc = eb->exec[i].relocation_count;
1638                struct drm_i915_gem_relocation_entry __user *urelocs;
1639                unsigned long size;
1640                unsigned long copied;
1641
1642                if (nreloc == 0)
1643                        continue;
1644
1645                err = check_relocations(&eb->exec[i]);
1646                if (err)
1647                        goto err;
1648
1649                urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
1650                size = nreloc * sizeof(*relocs);
1651
1652                relocs = kvmalloc_array(size, 1, GFP_KERNEL);
1653                if (!relocs) {
1654                        err = -ENOMEM;
1655                        goto err;
1656                }
1657
1658                /* copy_from_user is limited to < 4GiB */
1659                copied = 0;
1660                do {
1661                        unsigned int len =
1662                                min_t(u64, BIT_ULL(31), size - copied);
1663
1664                        if (__copy_from_user((char *)relocs + copied,
1665                                             (char __user *)urelocs + copied,
1666                                             len))
1667                                goto end;
1668
1669                        copied += len;
1670                } while (copied < size);
1671
1672                /*
1673                 * As we do not update the known relocation offsets after
1674                 * relocating (due to the complexities in lock handling),
1675                 * we need to mark them as invalid now so that we force the
1676                 * relocation processing next time. Just in case the target
1677                 * object is evicted and then rebound into its old
1678                 * presumed_offset before the next execbuffer - if that
1679                 * happened we would make the mistake of assuming that the
1680                 * relocations were valid.
1681                 */
1682                if (!user_access_begin(urelocs, size))
1683                        goto end;
1684
1685                for (copied = 0; copied < nreloc; copied++)
1686                        unsafe_put_user(-1,
1687                                        &urelocs[copied].presumed_offset,
1688                                        end_user);
1689                user_access_end();
1690
1691                eb->exec[i].relocs_ptr = (uintptr_t)relocs;
1692        }
1693
1694        return 0;
1695
1696end_user:
1697        user_access_end();
1698end:
1699        kvfree(relocs);
1700        err = -EFAULT;
1701err:
1702        while (i--) {
1703                relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
1704                if (eb->exec[i].relocation_count)
1705                        kvfree(relocs);
1706        }
1707        return err;
1708}
1709
1710static int eb_prefault_relocations(const struct i915_execbuffer *eb)
1711{
1712        const unsigned int count = eb->buffer_count;
1713        unsigned int i;
1714
1715        if (unlikely(i915_modparams.prefault_disable))
1716                return 0;
1717
1718        for (i = 0; i < count; i++) {
1719                int err;
1720
1721                err = check_relocations(&eb->exec[i]);
1722                if (err)
1723                        return err;
1724        }
1725
1726        return 0;
1727}
1728
1729static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
1730{
1731        struct drm_device *dev = &eb->i915->drm;
1732        bool have_copy = false;
1733        struct i915_vma *vma;
1734        int err = 0;
1735
1736repeat:
1737        if (signal_pending(current)) {
1738                err = -ERESTARTSYS;
1739                goto out;
1740        }
1741
1742        /* We may process another execbuffer during the unlock... */
1743        eb_reset_vmas(eb);
1744        mutex_unlock(&dev->struct_mutex);
1745
1746        /*
1747         * We take 3 passes through the slowpatch.
1748         *
1749         * 1 - we try to just prefault all the user relocation entries and
1750         * then attempt to reuse the atomic pagefault disabled fast path again.
1751         *
1752         * 2 - we copy the user entries to a local buffer here outside of the
1753         * local and allow ourselves to wait upon any rendering before
1754         * relocations
1755         *
1756         * 3 - we already have a local copy of the relocation entries, but
1757         * were interrupted (EAGAIN) whilst waiting for the objects, try again.
1758         */
1759        if (!err) {
1760                err = eb_prefault_relocations(eb);
1761        } else if (!have_copy) {
1762                err = eb_copy_relocations(eb);
1763                have_copy = err == 0;
1764        } else {
1765                cond_resched();
1766                err = 0;
1767        }
1768        if (err) {
1769                mutex_lock(&dev->struct_mutex);
1770                goto out;
1771        }
1772
1773        /* A frequent cause for EAGAIN are currently unavailable client pages */
1774        flush_workqueue(eb->i915->mm.userptr_wq);
1775
1776        err = i915_mutex_lock_interruptible(dev);
1777        if (err) {
1778                mutex_lock(&dev->struct_mutex);
1779                goto out;
1780        }
1781
1782        /* reacquire the objects */
1783        err = eb_lookup_vmas(eb);
1784        if (err)
1785                goto err;
1786
1787        GEM_BUG_ON(!eb->batch);
1788
1789        list_for_each_entry(vma, &eb->relocs, reloc_link) {
1790                if (!have_copy) {
1791                        pagefault_disable();
1792                        err = eb_relocate_vma(eb, vma);
1793                        pagefault_enable();
1794                        if (err)
1795                                goto repeat;
1796                } else {
1797                        err = eb_relocate_vma_slow(eb, vma);
1798                        if (err)
1799                                goto err;
1800                }
1801        }
1802
1803        /*
1804         * Leave the user relocations as are, this is the painfully slow path,
1805         * and we want to avoid the complication of dropping the lock whilst
1806         * having buffers reserved in the aperture and so causing spurious
1807         * ENOSPC for random operations.
1808         */
1809
1810err:
1811        if (err == -EAGAIN)
1812                goto repeat;
1813
1814out:
1815        if (have_copy) {
1816                const unsigned int count = eb->buffer_count;
1817                unsigned int i;
1818
1819                for (i = 0; i < count; i++) {
1820                        const struct drm_i915_gem_exec_object2 *entry =
1821                                &eb->exec[i];
1822                        struct drm_i915_gem_relocation_entry *relocs;
1823
1824                        if (!entry->relocation_count)
1825                                continue;
1826
1827                        relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1828                        kvfree(relocs);
1829                }
1830        }
1831
1832        return err;
1833}
1834
1835static int eb_relocate(struct i915_execbuffer *eb)
1836{
1837        if (eb_lookup_vmas(eb))
1838                goto slow;
1839
1840        /* The objects are in their final locations, apply the relocations. */
1841        if (eb->args->flags & __EXEC_HAS_RELOC) {
1842                struct i915_vma *vma;
1843
1844                list_for_each_entry(vma, &eb->relocs, reloc_link) {
1845                        if (eb_relocate_vma(eb, vma))
1846                                goto slow;
1847                }
1848        }
1849
1850        return 0;
1851
1852slow:
1853        return eb_relocate_slow(eb);
1854}
1855
1856static int eb_move_to_gpu(struct i915_execbuffer *eb)
1857{
1858        const unsigned int count = eb->buffer_count;
1859        struct ww_acquire_ctx acquire;
1860        unsigned int i;
1861        int err = 0;
1862
1863        ww_acquire_init(&acquire, &reservation_ww_class);
1864
1865        for (i = 0; i < count; i++) {
1866                struct i915_vma *vma = eb->vma[i];
1867
1868                err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
1869                if (!err)
1870                        continue;
1871
1872                GEM_BUG_ON(err == -EALREADY); /* No duplicate vma */
1873
1874                if (err == -EDEADLK) {
1875                        GEM_BUG_ON(i == 0);
1876                        do {
1877                                int j = i - 1;
1878
1879                                ww_mutex_unlock(&eb->vma[j]->resv->lock);
1880
1881                                swap(eb->flags[i], eb->flags[j]);
1882                                swap(eb->vma[i],  eb->vma[j]);
1883                                eb->vma[i]->exec_flags = &eb->flags[i];
1884                        } while (--i);
1885                        GEM_BUG_ON(vma != eb->vma[0]);
1886                        vma->exec_flags = &eb->flags[0];
1887
1888                        err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
1889                                                               &acquire);
1890                }
1891                if (err)
1892                        break;
1893        }
1894        ww_acquire_done(&acquire);
1895
1896        while (i--) {
1897                unsigned int flags = eb->flags[i];
1898                struct i915_vma *vma = eb->vma[i];
1899                struct drm_i915_gem_object *obj = vma->obj;
1900
1901                assert_vma_held(vma);
1902
1903                if (flags & EXEC_OBJECT_CAPTURE) {
1904                        struct i915_capture_list *capture;
1905
1906                        capture = kmalloc(sizeof(*capture), GFP_KERNEL);
1907                        if (capture) {
1908                                capture->next = eb->request->capture_list;
1909                                capture->vma = vma;
1910                                eb->request->capture_list = capture;
1911                        }
1912                }
1913
1914                /*
1915                 * If the GPU is not _reading_ through the CPU cache, we need
1916                 * to make sure that any writes (both previous GPU writes from
1917                 * before a change in snooping levels and normal CPU writes)
1918                 * caught in that cache are flushed to main memory.
1919                 *
1920                 * We want to say
1921                 *   obj->cache_dirty &&
1922                 *   !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
1923                 * but gcc's optimiser doesn't handle that as well and emits
1924                 * two jumps instead of one. Maybe one day...
1925                 */
1926                if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
1927                        if (i915_gem_clflush_object(obj, 0))
1928                                flags &= ~EXEC_OBJECT_ASYNC;
1929                }
1930
1931                if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) {
1932                        err = i915_request_await_object
1933                                (eb->request, obj, flags & EXEC_OBJECT_WRITE);
1934                }
1935
1936                if (err == 0)
1937                        err = i915_vma_move_to_active(vma, eb->request, flags);
1938
1939                i915_vma_unlock(vma);
1940
1941                __eb_unreserve_vma(vma, flags);
1942                vma->exec_flags = NULL;
1943
1944                if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
1945                        i915_vma_put(vma);
1946        }
1947        ww_acquire_fini(&acquire);
1948
1949        if (unlikely(err))
1950                goto err_skip;
1951
1952        eb->exec = NULL;
1953
1954        /* Unconditionally flush any chipset caches (for streaming writes). */
1955        i915_gem_chipset_flush(eb->i915);
1956        return 0;
1957
1958err_skip:
1959        i915_request_skip(eb->request, err);
1960        return err;
1961}
1962
1963static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
1964{
1965        if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
1966                return false;
1967
1968        /* Kernel clipping was a DRI1 misfeature */
1969        if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
1970                if (exec->num_cliprects || exec->cliprects_ptr)
1971                        return false;
1972        }
1973
1974        if (exec->DR4 == 0xffffffff) {
1975                DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1976                exec->DR4 = 0;
1977        }
1978        if (exec->DR1 || exec->DR4)
1979                return false;
1980
1981        if ((exec->batch_start_offset | exec->batch_len) & 0x7)
1982                return false;
1983
1984        return true;
1985}
1986
1987static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
1988{
1989        u32 *cs;
1990        int i;
1991
1992        if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS0) {
1993                DRM_DEBUG("sol reset is gen7/rcs only\n");
1994                return -EINVAL;
1995        }
1996
1997        cs = intel_ring_begin(rq, 4 * 2 + 2);
1998        if (IS_ERR(cs))
1999                return PTR_ERR(cs);
2000
2001        *cs++ = MI_LOAD_REGISTER_IMM(4);
2002        for (i = 0; i < 4; i++) {
2003                *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
2004                *cs++ = 0;
2005        }
2006        *cs++ = MI_NOOP;
2007        intel_ring_advance(rq, cs);
2008
2009        return 0;
2010}
2011
2012static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
2013{
2014        struct drm_i915_gem_object *shadow_batch_obj;
2015        struct i915_vma *vma;
2016        int err;
2017
2018        shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool,
2019                                                   PAGE_ALIGN(eb->batch_len));
2020        if (IS_ERR(shadow_batch_obj))
2021                return ERR_CAST(shadow_batch_obj);
2022
2023        err = intel_engine_cmd_parser(eb->engine,
2024                                      eb->batch->obj,
2025                                      shadow_batch_obj,
2026                                      eb->batch_start_offset,
2027                                      eb->batch_len,
2028                                      is_master);
2029        if (err) {
2030                if (err == -EACCES) /* unhandled chained batch */
2031                        vma = NULL;
2032                else
2033                        vma = ERR_PTR(err);
2034                goto out;
2035        }
2036
2037        vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
2038        if (IS_ERR(vma))
2039                goto out;
2040
2041        eb->vma[eb->buffer_count] = i915_vma_get(vma);
2042        eb->flags[eb->buffer_count] =
2043                __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
2044        vma->exec_flags = &eb->flags[eb->buffer_count];
2045        eb->buffer_count++;
2046
2047out:
2048        i915_gem_object_unpin_pages(shadow_batch_obj);
2049        return vma;
2050}
2051
2052static void
2053add_to_client(struct i915_request *rq, struct drm_file *file)
2054{
2055        rq->file_priv = file->driver_priv;
2056        list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
2057}
2058
2059static int eb_submit(struct i915_execbuffer *eb)
2060{
2061        int err;
2062
2063        err = eb_move_to_gpu(eb);
2064        if (err)
2065                return err;
2066
2067        if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
2068                err = i915_reset_gen7_sol_offsets(eb->request);
2069                if (err)
2070                        return err;
2071        }
2072
2073        /*
2074         * After we completed waiting for other engines (using HW semaphores)
2075         * then we can signal that this request/batch is ready to run. This
2076         * allows us to determine if the batch is still waiting on the GPU
2077         * or actually running by checking the breadcrumb.
2078         */
2079        if (eb->engine->emit_init_breadcrumb) {
2080                err = eb->engine->emit_init_breadcrumb(eb->request);
2081                if (err)
2082                        return err;
2083        }
2084
2085        err = eb->engine->emit_bb_start(eb->request,
2086                                        eb->batch->node.start +
2087                                        eb->batch_start_offset,
2088                                        eb->batch_len,
2089                                        eb->batch_flags);
2090        if (err)
2091                return err;
2092
2093        return 0;
2094}
2095
2096/*
2097 * Find one BSD ring to dispatch the corresponding BSD command.
2098 * The engine index is returned.
2099 */
2100static unsigned int
2101gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
2102                         struct drm_file *file)
2103{
2104        struct drm_i915_file_private *file_priv = file->driver_priv;
2105
2106        /* Check whether the file_priv has already selected one ring. */
2107        if ((int)file_priv->bsd_engine < 0)
2108                file_priv->bsd_engine = atomic_fetch_xor(1,
2109                         &dev_priv->mm.bsd_engine_dispatch_index);
2110
2111        return file_priv->bsd_engine;
2112}
2113
2114static const enum intel_engine_id user_ring_map[] = {
2115        [I915_EXEC_DEFAULT]     = RCS0,
2116        [I915_EXEC_RENDER]      = RCS0,
2117        [I915_EXEC_BLT]         = BCS0,
2118        [I915_EXEC_BSD]         = VCS0,
2119        [I915_EXEC_VEBOX]       = VECS0
2120};
2121
2122static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
2123{
2124        int err;
2125
2126        /*
2127         * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
2128         * EIO if the GPU is already wedged.
2129         */
2130        err = i915_terminally_wedged(eb->i915);
2131        if (err)
2132                return err;
2133
2134        /*
2135         * Pinning the contexts may generate requests in order to acquire
2136         * GGTT space, so do this first before we reserve a seqno for
2137         * ourselves.
2138         */
2139        err = intel_context_pin(ce);
2140        if (err)
2141                return err;
2142
2143        eb->engine = ce->engine;
2144        eb->context = ce;
2145        return 0;
2146}
2147
2148static void eb_unpin_context(struct i915_execbuffer *eb)
2149{
2150        intel_context_unpin(eb->context);
2151}
2152
2153static unsigned int
2154eb_select_legacy_ring(struct i915_execbuffer *eb,
2155                      struct drm_file *file,
2156                      struct drm_i915_gem_execbuffer2 *args)
2157{
2158        struct drm_i915_private *i915 = eb->i915;
2159        unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
2160
2161        if (user_ring_id != I915_EXEC_BSD &&
2162            (args->flags & I915_EXEC_BSD_MASK)) {
2163                DRM_DEBUG("execbuf with non bsd ring but with invalid "
2164                          "bsd dispatch flags: %d\n", (int)(args->flags));
2165                return -1;
2166        }
2167
2168        if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) {
2169                unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
2170
2171                if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
2172                        bsd_idx = gen8_dispatch_bsd_engine(i915, file);
2173                } else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
2174                           bsd_idx <= I915_EXEC_BSD_RING2) {
2175                        bsd_idx >>= I915_EXEC_BSD_SHIFT;
2176                        bsd_idx--;
2177                } else {
2178                        DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
2179                                  bsd_idx);
2180                        return -1;
2181                }
2182
2183                return _VCS(bsd_idx);
2184        }
2185
2186        if (user_ring_id >= ARRAY_SIZE(user_ring_map)) {
2187                DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
2188                return -1;
2189        }
2190
2191        return user_ring_map[user_ring_id];
2192}
2193
2194static int
2195eb_select_engine(struct i915_execbuffer *eb,
2196                 struct drm_file *file,
2197                 struct drm_i915_gem_execbuffer2 *args)
2198{
2199        struct intel_context *ce;
2200        unsigned int idx;
2201        int err;
2202
2203        if (i915_gem_context_user_engines(eb->gem_context))
2204                idx = args->flags & I915_EXEC_RING_MASK;
2205        else
2206                idx = eb_select_legacy_ring(eb, file, args);
2207
2208        ce = i915_gem_context_get_engine(eb->gem_context, idx);
2209        if (IS_ERR(ce))
2210                return PTR_ERR(ce);
2211
2212        err = eb_pin_context(eb, ce);
2213        intel_context_put(ce);
2214
2215        return err;
2216}
2217
2218static void
2219__free_fence_array(struct drm_syncobj **fences, unsigned int n)
2220{
2221        while (n--)
2222                drm_syncobj_put(ptr_mask_bits(fences[n], 2));
2223        kvfree(fences);
2224}
2225
2226static struct drm_syncobj **
2227get_fence_array(struct drm_i915_gem_execbuffer2 *args,
2228                struct drm_file *file)
2229{
2230        const unsigned long nfences = args->num_cliprects;
2231        struct drm_i915_gem_exec_fence __user *user;
2232        struct drm_syncobj **fences;
2233        unsigned long n;
2234        int err;
2235
2236        if (!(args->flags & I915_EXEC_FENCE_ARRAY))
2237                return NULL;
2238
2239        /* Check multiplication overflow for access_ok() and kvmalloc_array() */
2240        BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
2241        if (nfences > min_t(unsigned long,
2242                            ULONG_MAX / sizeof(*user),
2243                            SIZE_MAX / sizeof(*fences)))
2244                return ERR_PTR(-EINVAL);
2245
2246        user = u64_to_user_ptr(args->cliprects_ptr);
2247        if (!access_ok(user, nfences * sizeof(*user)))
2248                return ERR_PTR(-EFAULT);
2249
2250        fences = kvmalloc_array(nfences, sizeof(*fences),
2251                                __GFP_NOWARN | GFP_KERNEL);
2252        if (!fences)
2253                return ERR_PTR(-ENOMEM);
2254
2255        for (n = 0; n < nfences; n++) {
2256                struct drm_i915_gem_exec_fence fence;
2257                struct drm_syncobj *syncobj;
2258
2259                if (__copy_from_user(&fence, user++, sizeof(fence))) {
2260                        err = -EFAULT;
2261                        goto err;
2262                }
2263
2264                if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
2265                        err = -EINVAL;
2266                        goto err;
2267                }
2268
2269                syncobj = drm_syncobj_find(file, fence.handle);
2270                if (!syncobj) {
2271                        DRM_DEBUG("Invalid syncobj handle provided\n");
2272                        err = -ENOENT;
2273                        goto err;
2274                }
2275
2276                BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
2277                             ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
2278
2279                fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
2280        }
2281
2282        return fences;
2283
2284err:
2285        __free_fence_array(fences, n);
2286        return ERR_PTR(err);
2287}
2288
2289static void
2290put_fence_array(struct drm_i915_gem_execbuffer2 *args,
2291                struct drm_syncobj **fences)
2292{
2293        if (fences)
2294                __free_fence_array(fences, args->num_cliprects);
2295}
2296
2297static int
2298await_fence_array(struct i915_execbuffer *eb,
2299                  struct drm_syncobj **fences)
2300{
2301        const unsigned int nfences = eb->args->num_cliprects;
2302        unsigned int n;
2303        int err;
2304
2305        for (n = 0; n < nfences; n++) {
2306                struct drm_syncobj *syncobj;
2307                struct dma_fence *fence;
2308                unsigned int flags;
2309
2310                syncobj = ptr_unpack_bits(fences[n], &flags, 2);
2311                if (!(flags & I915_EXEC_FENCE_WAIT))
2312                        continue;
2313
2314                fence = drm_syncobj_fence_get(syncobj);
2315                if (!fence)
2316                        return -EINVAL;
2317
2318                err = i915_request_await_dma_fence(eb->request, fence);
2319                dma_fence_put(fence);
2320                if (err < 0)
2321                        return err;
2322        }
2323
2324        return 0;
2325}
2326
2327static void
2328signal_fence_array(struct i915_execbuffer *eb,
2329                   struct drm_syncobj **fences)
2330{
2331        const unsigned int nfences = eb->args->num_cliprects;
2332        struct dma_fence * const fence = &eb->request->fence;
2333        unsigned int n;
2334
2335        for (n = 0; n < nfences; n++) {
2336                struct drm_syncobj *syncobj;
2337                unsigned int flags;
2338
2339                syncobj = ptr_unpack_bits(fences[n], &flags, 2);
2340                if (!(flags & I915_EXEC_FENCE_SIGNAL))
2341                        continue;
2342
2343                drm_syncobj_replace_fence(syncobj, fence);
2344        }
2345}
2346
2347static int
2348i915_gem_do_execbuffer(struct drm_device *dev,
2349                       struct drm_file *file,
2350                       struct drm_i915_gem_execbuffer2 *args,
2351                       struct drm_i915_gem_exec_object2 *exec,
2352                       struct drm_syncobj **fences)
2353{
2354        struct i915_execbuffer eb;
2355        struct dma_fence *in_fence = NULL;
2356        struct dma_fence *exec_fence = NULL;
2357        struct sync_file *out_fence = NULL;
2358        int out_fence_fd = -1;
2359        int err;
2360
2361        BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
2362        BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
2363                     ~__EXEC_OBJECT_UNKNOWN_FLAGS);
2364
2365        eb.i915 = to_i915(dev);
2366        eb.file = file;
2367        eb.args = args;
2368        if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
2369                args->flags |= __EXEC_HAS_RELOC;
2370
2371        eb.exec = exec;
2372        eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1);
2373        eb.vma[0] = NULL;
2374        eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
2375
2376        eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
2377        reloc_cache_init(&eb.reloc_cache, eb.i915);
2378
2379        eb.buffer_count = args->buffer_count;
2380        eb.batch_start_offset = args->batch_start_offset;
2381        eb.batch_len = args->batch_len;
2382
2383        eb.batch_flags = 0;
2384        if (args->flags & I915_EXEC_SECURE) {
2385                if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
2386                    return -EPERM;
2387
2388                eb.batch_flags |= I915_DISPATCH_SECURE;
2389        }
2390        if (args->flags & I915_EXEC_IS_PINNED)
2391                eb.batch_flags |= I915_DISPATCH_PINNED;
2392
2393        if (args->flags & I915_EXEC_FENCE_IN) {
2394                in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
2395                if (!in_fence)
2396                        return -EINVAL;
2397        }
2398
2399        if (args->flags & I915_EXEC_FENCE_SUBMIT) {
2400                if (in_fence) {
2401                        err = -EINVAL;
2402                        goto err_in_fence;
2403                }
2404
2405                exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
2406                if (!exec_fence) {
2407                        err = -EINVAL;
2408                        goto err_in_fence;
2409                }
2410        }
2411
2412        if (args->flags & I915_EXEC_FENCE_OUT) {
2413                out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
2414                if (out_fence_fd < 0) {
2415                        err = out_fence_fd;
2416                        goto err_exec_fence;
2417                }
2418        }
2419
2420        err = eb_create(&eb);
2421        if (err)
2422                goto err_out_fence;
2423
2424        GEM_BUG_ON(!eb.lut_size);
2425
2426        err = eb_select_context(&eb);
2427        if (unlikely(err))
2428                goto err_destroy;
2429
2430        /*
2431         * Take a local wakeref for preparing to dispatch the execbuf as
2432         * we expect to access the hardware fairly frequently in the
2433         * process. Upon first dispatch, we acquire another prolonged
2434         * wakeref that we hold until the GPU has been idle for at least
2435         * 100ms.
2436         */
2437        intel_gt_pm_get(eb.i915);
2438
2439        err = i915_mutex_lock_interruptible(dev);
2440        if (err)
2441                goto err_rpm;
2442
2443        err = eb_select_engine(&eb, file, args);
2444        if (unlikely(err))
2445                goto err_unlock;
2446
2447        err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
2448        if (unlikely(err))
2449                goto err_engine;
2450
2451        err = eb_relocate(&eb);
2452        if (err) {
2453                /*
2454                 * If the user expects the execobject.offset and
2455                 * reloc.presumed_offset to be an exact match,
2456                 * as for using NO_RELOC, then we cannot update
2457                 * the execobject.offset until we have completed
2458                 * relocation.
2459                 */
2460                args->flags &= ~__EXEC_HAS_RELOC;
2461                goto err_vma;
2462        }
2463
2464        if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) {
2465                DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
2466                err = -EINVAL;
2467                goto err_vma;
2468        }
2469        if (eb.batch_start_offset > eb.batch->size ||
2470            eb.batch_len > eb.batch->size - eb.batch_start_offset) {
2471                DRM_DEBUG("Attempting to use out-of-bounds batch\n");
2472                err = -EINVAL;
2473                goto err_vma;
2474        }
2475
2476        if (eb_use_cmdparser(&eb)) {
2477                struct i915_vma *vma;
2478
2479                vma = eb_parse(&eb, drm_is_current_master(file));
2480                if (IS_ERR(vma)) {
2481                        err = PTR_ERR(vma);
2482                        goto err_vma;
2483                }
2484
2485                if (vma) {
2486                        /*
2487                         * Batch parsed and accepted:
2488                         *
2489                         * Set the DISPATCH_SECURE bit to remove the NON_SECURE
2490                         * bit from MI_BATCH_BUFFER_START commands issued in
2491                         * the dispatch_execbuffer implementations. We
2492                         * specifically don't want that set on batches the
2493                         * command parser has accepted.
2494                         */
2495                        eb.batch_flags |= I915_DISPATCH_SECURE;
2496                        eb.batch_start_offset = 0;
2497                        eb.batch = vma;
2498                }
2499        }
2500
2501        if (eb.batch_len == 0)
2502                eb.batch_len = eb.batch->size - eb.batch_start_offset;
2503
2504        /*
2505         * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
2506         * batch" bit. Hence we need to pin secure batches into the global gtt.
2507         * hsw should have this fixed, but bdw mucks it up again. */
2508        if (eb.batch_flags & I915_DISPATCH_SECURE) {
2509                struct i915_vma *vma;
2510
2511                /*
2512                 * So on first glance it looks freaky that we pin the batch here
2513                 * outside of the reservation loop. But:
2514                 * - The batch is already pinned into the relevant ppgtt, so we
2515                 *   already have the backing storage fully allocated.
2516                 * - No other BO uses the global gtt (well contexts, but meh),
2517                 *   so we don't really have issues with multiple objects not
2518                 *   fitting due to fragmentation.
2519                 * So this is actually safe.
2520                 */
2521                vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0);
2522                if (IS_ERR(vma)) {
2523                        err = PTR_ERR(vma);
2524                        goto err_vma;
2525                }
2526
2527                eb.batch = vma;
2528        }
2529
2530        /* All GPU relocation batches must be submitted prior to the user rq */
2531        GEM_BUG_ON(eb.reloc_cache.rq);
2532
2533        /* Allocate a request for this batch buffer nice and early. */
2534        eb.request = i915_request_create(eb.context);
2535        if (IS_ERR(eb.request)) {
2536                err = PTR_ERR(eb.request);
2537                goto err_batch_unpin;
2538        }
2539
2540        if (in_fence) {
2541                err = i915_request_await_dma_fence(eb.request, in_fence);
2542                if (err < 0)
2543                        goto err_request;
2544        }
2545
2546        if (exec_fence) {
2547                err = i915_request_await_execution(eb.request, exec_fence,
2548                                                   eb.engine->bond_execute);
2549                if (err < 0)
2550                        goto err_request;
2551        }
2552
2553        if (fences) {
2554                err = await_fence_array(&eb, fences);
2555                if (err)
2556                        goto err_request;
2557        }
2558
2559        if (out_fence_fd != -1) {
2560                out_fence = sync_file_create(&eb.request->fence);
2561                if (!out_fence) {
2562                        err = -ENOMEM;
2563                        goto err_request;
2564                }
2565        }
2566
2567        /*
2568         * Whilst this request exists, batch_obj will be on the
2569         * active_list, and so will hold the active reference. Only when this
2570         * request is retired will the the batch_obj be moved onto the
2571         * inactive_list and lose its active reference. Hence we do not need
2572         * to explicitly hold another reference here.
2573         */
2574        eb.request->batch = eb.batch;
2575
2576        trace_i915_request_queue(eb.request, eb.batch_flags);
2577        err = eb_submit(&eb);
2578err_request:
2579        add_to_client(eb.request, file);
2580        i915_request_add(eb.request);
2581
2582        if (fences)
2583                signal_fence_array(&eb, fences);
2584
2585        if (out_fence) {
2586                if (err == 0) {
2587                        fd_install(out_fence_fd, out_fence->file);
2588                        args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */
2589                        args->rsvd2 |= (u64)out_fence_fd << 32;
2590                        out_fence_fd = -1;
2591                } else {
2592                        fput(out_fence->file);
2593                }
2594        }
2595
2596err_batch_unpin:
2597        if (eb.batch_flags & I915_DISPATCH_SECURE)
2598                i915_vma_unpin(eb.batch);
2599err_vma:
2600        if (eb.exec)
2601                eb_release_vmas(&eb);
2602err_engine:
2603        eb_unpin_context(&eb);
2604err_unlock:
2605        mutex_unlock(&dev->struct_mutex);
2606err_rpm:
2607        intel_gt_pm_put(eb.i915);
2608        i915_gem_context_put(eb.gem_context);
2609err_destroy:
2610        eb_destroy(&eb);
2611err_out_fence:
2612        if (out_fence_fd != -1)
2613                put_unused_fd(out_fence_fd);
2614err_exec_fence:
2615        dma_fence_put(exec_fence);
2616err_in_fence:
2617        dma_fence_put(in_fence);
2618        return err;
2619}
2620
2621static size_t eb_element_size(void)
2622{
2623        return (sizeof(struct drm_i915_gem_exec_object2) +
2624                sizeof(struct i915_vma *) +
2625                sizeof(unsigned int));
2626}
2627
2628static bool check_buffer_count(size_t count)
2629{
2630        const size_t sz = eb_element_size();
2631
2632        /*
2633         * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup
2634         * array size (see eb_create()). Otherwise, we can accept an array as
2635         * large as can be addressed (though use large arrays at your peril)!
2636         */
2637
2638        return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1);
2639}
2640
2641/*
2642 * Legacy execbuffer just creates an exec2 list from the original exec object
2643 * list array and passes it to the real function.
2644 */
2645int
2646i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
2647                          struct drm_file *file)
2648{
2649        struct drm_i915_gem_execbuffer *args = data;
2650        struct drm_i915_gem_execbuffer2 exec2;
2651        struct drm_i915_gem_exec_object *exec_list = NULL;
2652        struct drm_i915_gem_exec_object2 *exec2_list = NULL;
2653        const size_t count = args->buffer_count;
2654        unsigned int i;
2655        int err;
2656
2657        if (!check_buffer_count(count)) {
2658                DRM_DEBUG("execbuf2 with %zd buffers\n", count);
2659                return -EINVAL;
2660        }
2661
2662        exec2.buffers_ptr = args->buffers_ptr;
2663        exec2.buffer_count = args->buffer_count;
2664        exec2.batch_start_offset = args->batch_start_offset;
2665        exec2.batch_len = args->batch_len;
2666        exec2.DR1 = args->DR1;
2667        exec2.DR4 = args->DR4;
2668        exec2.num_cliprects = args->num_cliprects;
2669        exec2.cliprects_ptr = args->cliprects_ptr;
2670        exec2.flags = I915_EXEC_RENDER;
2671        i915_execbuffer2_set_context_id(exec2, 0);
2672
2673        if (!i915_gem_check_execbuffer(&exec2))
2674                return -EINVAL;
2675
2676        /* Copy in the exec list from userland */
2677        exec_list = kvmalloc_array(count, sizeof(*exec_list),
2678                                   __GFP_NOWARN | GFP_KERNEL);
2679        exec2_list = kvmalloc_array(count + 1, eb_element_size(),
2680                                    __GFP_NOWARN | GFP_KERNEL);
2681        if (exec_list == NULL || exec2_list == NULL) {
2682                DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
2683                          args->buffer_count);
2684                kvfree(exec_list);
2685                kvfree(exec2_list);
2686                return -ENOMEM;
2687        }
2688        err = copy_from_user(exec_list,
2689                             u64_to_user_ptr(args->buffers_ptr),
2690                             sizeof(*exec_list) * count);
2691        if (err) {
2692                DRM_DEBUG("copy %d exec entries failed %d\n",
2693                          args->buffer_count, err);
2694                kvfree(exec_list);
2695                kvfree(exec2_list);
2696                return -EFAULT;
2697        }
2698
2699        for (i = 0; i < args->buffer_count; i++) {
2700                exec2_list[i].handle = exec_list[i].handle;
2701                exec2_list[i].relocation_count = exec_list[i].relocation_count;
2702                exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
2703                exec2_list[i].alignment = exec_list[i].alignment;
2704                exec2_list[i].offset = exec_list[i].offset;
2705                if (INTEL_GEN(to_i915(dev)) < 4)
2706                        exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
2707                else
2708                        exec2_list[i].flags = 0;
2709        }
2710
2711        err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
2712        if (exec2.flags & __EXEC_HAS_RELOC) {
2713                struct drm_i915_gem_exec_object __user *user_exec_list =
2714                        u64_to_user_ptr(args->buffers_ptr);
2715
2716                /* Copy the new buffer offsets back to the user's exec list. */
2717                for (i = 0; i < args->buffer_count; i++) {
2718                        if (!(exec2_list[i].offset & UPDATE))
2719                                continue;
2720
2721                        exec2_list[i].offset =
2722                                gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
2723                        exec2_list[i].offset &= PIN_OFFSET_MASK;
2724                        if (__copy_to_user(&user_exec_list[i].offset,
2725                                           &exec2_list[i].offset,
2726                                           sizeof(user_exec_list[i].offset)))
2727                                break;
2728                }
2729        }
2730
2731        kvfree(exec_list);
2732        kvfree(exec2_list);
2733        return err;
2734}
2735
2736int
2737i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
2738                           struct drm_file *file)
2739{
2740        struct drm_i915_gem_execbuffer2 *args = data;
2741        struct drm_i915_gem_exec_object2 *exec2_list;
2742        struct drm_syncobj **fences = NULL;
2743        const size_t count = args->buffer_count;
2744        int err;
2745
2746        if (!check_buffer_count(count)) {
2747                DRM_DEBUG("execbuf2 with %zd buffers\n", count);
2748                return -EINVAL;
2749        }
2750
2751        if (!i915_gem_check_execbuffer(args))
2752                return -EINVAL;
2753
2754        /* Allocate an extra slot for use by the command parser */
2755        exec2_list = kvmalloc_array(count + 1, eb_element_size(),
2756                                    __GFP_NOWARN | GFP_KERNEL);
2757        if (exec2_list == NULL) {
2758                DRM_DEBUG("Failed to allocate exec list for %zd buffers\n",
2759                          count);
2760                return -ENOMEM;
2761        }
2762        if (copy_from_user(exec2_list,
2763                           u64_to_user_ptr(args->buffers_ptr),
2764                           sizeof(*exec2_list) * count)) {
2765                DRM_DEBUG("copy %zd exec entries failed\n", count);
2766                kvfree(exec2_list);
2767                return -EFAULT;
2768        }
2769
2770        if (args->flags & I915_EXEC_FENCE_ARRAY) {
2771                fences = get_fence_array(args, file);
2772                if (IS_ERR(fences)) {
2773                        kvfree(exec2_list);
2774                        return PTR_ERR(fences);
2775                }
2776        }
2777
2778        err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
2779
2780        /*
2781         * Now that we have begun execution of the batchbuffer, we ignore
2782         * any new error after this point. Also given that we have already
2783         * updated the associated relocations, we try to write out the current
2784         * object locations irrespective of any error.
2785         */
2786        if (args->flags & __EXEC_HAS_RELOC) {
2787                struct drm_i915_gem_exec_object2 __user *user_exec_list =
2788                        u64_to_user_ptr(args->buffers_ptr);
2789                unsigned int i;
2790
2791                /* Copy the new buffer offsets back to the user's exec list. */
2792                /*
2793                 * Note: count * sizeof(*user_exec_list) does not overflow,
2794                 * because we checked 'count' in check_buffer_count().
2795                 *
2796                 * And this range already got effectively checked earlier
2797                 * when we did the "copy_from_user()" above.
2798                 */
2799                if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list)))
2800                        goto end;
2801
2802                for (i = 0; i < args->buffer_count; i++) {
2803                        if (!(exec2_list[i].offset & UPDATE))
2804                                continue;
2805
2806                        exec2_list[i].offset =
2807                                gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
2808                        unsafe_put_user(exec2_list[i].offset,
2809                                        &user_exec_list[i].offset,
2810                                        end_user);
2811                }
2812end_user:
2813                user_access_end();
2814end:;
2815        }
2816
2817        args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
2818        put_fence_array(args, fences);
2819        kvfree(exec2_list);
2820        return err;
2821}
2822