linux/drivers/gpu/drm/i915/gem/i915_gem_domain.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2014-2016 Intel Corporation
   5 */
   6
   7#include "display/intel_frontbuffer.h"
   8#include "gt/intel_gt.h"
   9
  10#include "i915_drv.h"
  11#include "i915_gem_clflush.h"
  12#include "i915_gem_gtt.h"
  13#include "i915_gem_ioctls.h"
  14#include "i915_gem_object.h"
  15#include "i915_vma.h"
  16#include "i915_gem_lmem.h"
  17#include "i915_gem_mman.h"
  18
  19static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
  20{
  21        return !(obj->cache_level == I915_CACHE_NONE ||
  22                 obj->cache_level == I915_CACHE_WT);
  23}
  24
  25static void
  26flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
  27{
  28        struct i915_vma *vma;
  29
  30        assert_object_held(obj);
  31
  32        if (!(obj->write_domain & flush_domains))
  33                return;
  34
  35        switch (obj->write_domain) {
  36        case I915_GEM_DOMAIN_GTT:
  37                spin_lock(&obj->vma.lock);
  38                for_each_ggtt_vma(vma, obj) {
  39                        if (i915_vma_unset_ggtt_write(vma))
  40                                intel_gt_flush_ggtt_writes(vma->vm->gt);
  41                }
  42                spin_unlock(&obj->vma.lock);
  43
  44                i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
  45                break;
  46
  47        case I915_GEM_DOMAIN_WC:
  48                wmb();
  49                break;
  50
  51        case I915_GEM_DOMAIN_CPU:
  52                i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
  53                break;
  54
  55        case I915_GEM_DOMAIN_RENDER:
  56                if (gpu_write_needs_clflush(obj))
  57                        obj->cache_dirty = true;
  58                break;
  59        }
  60
  61        obj->write_domain = 0;
  62}
  63
  64static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
  65{
  66        /*
  67         * We manually flush the CPU domain so that we can override and
  68         * force the flush for the display, and perform it asyncrhonously.
  69         */
  70        flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
  71        if (obj->cache_dirty)
  72                i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
  73        obj->write_domain = 0;
  74}
  75
  76void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
  77{
  78        if (!i915_gem_object_is_framebuffer(obj))
  79                return;
  80
  81        i915_gem_object_lock(obj, NULL);
  82        __i915_gem_object_flush_for_display(obj);
  83        i915_gem_object_unlock(obj);
  84}
  85
  86void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
  87{
  88        if (i915_gem_object_is_framebuffer(obj))
  89                __i915_gem_object_flush_for_display(obj);
  90}
  91
  92/**
  93 * Moves a single object to the WC read, and possibly write domain.
  94 * @obj: object to act on
  95 * @write: ask for write access or read only
  96 *
  97 * This function returns when the move is complete, including waiting on
  98 * flushes to occur.
  99 */
 100int
 101i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 102{
 103        int ret;
 104
 105        assert_object_held(obj);
 106
 107        ret = i915_gem_object_wait(obj,
 108                                   I915_WAIT_INTERRUPTIBLE |
 109                                   (write ? I915_WAIT_ALL : 0),
 110                                   MAX_SCHEDULE_TIMEOUT);
 111        if (ret)
 112                return ret;
 113
 114        if (obj->write_domain == I915_GEM_DOMAIN_WC)
 115                return 0;
 116
 117        /* Flush and acquire obj->pages so that we are coherent through
 118         * direct access in memory with previous cached writes through
 119         * shmemfs and that our cache domain tracking remains valid.
 120         * For example, if the obj->filp was moved to swap without us
 121         * being notified and releasing the pages, we would mistakenly
 122         * continue to assume that the obj remained out of the CPU cached
 123         * domain.
 124         */
 125        ret = i915_gem_object_pin_pages(obj);
 126        if (ret)
 127                return ret;
 128
 129        flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
 130
 131        /* Serialise direct access to this object with the barriers for
 132         * coherent writes from the GPU, by effectively invalidating the
 133         * WC domain upon first access.
 134         */
 135        if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
 136                mb();
 137
 138        /* It should now be out of any other write domains, and we can update
 139         * the domain values for our changes.
 140         */
 141        GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
 142        obj->read_domains |= I915_GEM_DOMAIN_WC;
 143        if (write) {
 144                obj->read_domains = I915_GEM_DOMAIN_WC;
 145                obj->write_domain = I915_GEM_DOMAIN_WC;
 146                obj->mm.dirty = true;
 147        }
 148
 149        i915_gem_object_unpin_pages(obj);
 150        return 0;
 151}
 152
 153/**
 154 * Moves a single object to the GTT read, and possibly write domain.
 155 * @obj: object to act on
 156 * @write: ask for write access or read only
 157 *
 158 * This function returns when the move is complete, including waiting on
 159 * flushes to occur.
 160 */
 161int
 162i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 163{
 164        int ret;
 165
 166        assert_object_held(obj);
 167
 168        ret = i915_gem_object_wait(obj,
 169                                   I915_WAIT_INTERRUPTIBLE |
 170                                   (write ? I915_WAIT_ALL : 0),
 171                                   MAX_SCHEDULE_TIMEOUT);
 172        if (ret)
 173                return ret;
 174
 175        if (obj->write_domain == I915_GEM_DOMAIN_GTT)
 176                return 0;
 177
 178        /* Flush and acquire obj->pages so that we are coherent through
 179         * direct access in memory with previous cached writes through
 180         * shmemfs and that our cache domain tracking remains valid.
 181         * For example, if the obj->filp was moved to swap without us
 182         * being notified and releasing the pages, we would mistakenly
 183         * continue to assume that the obj remained out of the CPU cached
 184         * domain.
 185         */
 186        ret = i915_gem_object_pin_pages(obj);
 187        if (ret)
 188                return ret;
 189
 190        flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
 191
 192        /* Serialise direct access to this object with the barriers for
 193         * coherent writes from the GPU, by effectively invalidating the
 194         * GTT domain upon first access.
 195         */
 196        if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
 197                mb();
 198
 199        /* It should now be out of any other write domains, and we can update
 200         * the domain values for our changes.
 201         */
 202        GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
 203        obj->read_domains |= I915_GEM_DOMAIN_GTT;
 204        if (write) {
 205                struct i915_vma *vma;
 206
 207                obj->read_domains = I915_GEM_DOMAIN_GTT;
 208                obj->write_domain = I915_GEM_DOMAIN_GTT;
 209                obj->mm.dirty = true;
 210
 211                spin_lock(&obj->vma.lock);
 212                for_each_ggtt_vma(vma, obj)
 213                        if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
 214                                i915_vma_set_ggtt_write(vma);
 215                spin_unlock(&obj->vma.lock);
 216        }
 217
 218        i915_gem_object_unpin_pages(obj);
 219        return 0;
 220}
 221
 222/**
 223 * Changes the cache-level of an object across all VMA.
 224 * @obj: object to act on
 225 * @cache_level: new cache level to set for the object
 226 *
 227 * After this function returns, the object will be in the new cache-level
 228 * across all GTT and the contents of the backing storage will be coherent,
 229 * with respect to the new cache-level. In order to keep the backing storage
 230 * coherent for all users, we only allow a single cache level to be set
 231 * globally on the object and prevent it from being changed whilst the
 232 * hardware is reading from the object. That is if the object is currently
 233 * on the scanout it will be set to uncached (or equivalent display
 234 * cache coherency) and all non-MOCS GPU access will also be uncached so
 235 * that all direct access to the scanout remains coherent.
 236 */
 237int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 238                                    enum i915_cache_level cache_level)
 239{
 240        int ret;
 241
 242        if (obj->cache_level == cache_level)
 243                return 0;
 244
 245        ret = i915_gem_object_wait(obj,
 246                                   I915_WAIT_INTERRUPTIBLE |
 247                                   I915_WAIT_ALL,
 248                                   MAX_SCHEDULE_TIMEOUT);
 249        if (ret)
 250                return ret;
 251
 252        /* Always invalidate stale cachelines */
 253        if (obj->cache_level != cache_level) {
 254                i915_gem_object_set_cache_coherency(obj, cache_level);
 255                obj->cache_dirty = true;
 256        }
 257
 258        /* The cache-level will be applied when each vma is rebound. */
 259        return i915_gem_object_unbind(obj,
 260                                      I915_GEM_OBJECT_UNBIND_ACTIVE |
 261                                      I915_GEM_OBJECT_UNBIND_BARRIER);
 262}
 263
 264int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
 265                               struct drm_file *file)
 266{
 267        struct drm_i915_gem_caching *args = data;
 268        struct drm_i915_gem_object *obj;
 269        int err = 0;
 270
 271        if (IS_DGFX(to_i915(dev)))
 272                return -ENODEV;
 273
 274        rcu_read_lock();
 275        obj = i915_gem_object_lookup_rcu(file, args->handle);
 276        if (!obj) {
 277                err = -ENOENT;
 278                goto out;
 279        }
 280
 281        switch (obj->cache_level) {
 282        case I915_CACHE_LLC:
 283        case I915_CACHE_L3_LLC:
 284                args->caching = I915_CACHING_CACHED;
 285                break;
 286
 287        case I915_CACHE_WT:
 288                args->caching = I915_CACHING_DISPLAY;
 289                break;
 290
 291        default:
 292                args->caching = I915_CACHING_NONE;
 293                break;
 294        }
 295out:
 296        rcu_read_unlock();
 297        return err;
 298}
 299
 300int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 301                               struct drm_file *file)
 302{
 303        struct drm_i915_private *i915 = to_i915(dev);
 304        struct drm_i915_gem_caching *args = data;
 305        struct drm_i915_gem_object *obj;
 306        enum i915_cache_level level;
 307        int ret = 0;
 308
 309        if (IS_DGFX(i915))
 310                return -ENODEV;
 311
 312        switch (args->caching) {
 313        case I915_CACHING_NONE:
 314                level = I915_CACHE_NONE;
 315                break;
 316        case I915_CACHING_CACHED:
 317                /*
 318                 * Due to a HW issue on BXT A stepping, GPU stores via a
 319                 * snooped mapping may leave stale data in a corresponding CPU
 320                 * cacheline, whereas normally such cachelines would get
 321                 * invalidated.
 322                 */
 323                if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
 324                        return -ENODEV;
 325
 326                level = I915_CACHE_LLC;
 327                break;
 328        case I915_CACHING_DISPLAY:
 329                level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
 330                break;
 331        default:
 332                return -EINVAL;
 333        }
 334
 335        obj = i915_gem_object_lookup(file, args->handle);
 336        if (!obj)
 337                return -ENOENT;
 338
 339        /*
 340         * The caching mode of proxy object is handled by its generator, and
 341         * not allowed to be changed by userspace.
 342         */
 343        if (i915_gem_object_is_proxy(obj)) {
 344                /*
 345                 * Silently allow cached for userptr; the vulkan driver
 346                 * sets all objects to cached
 347                 */
 348                if (!i915_gem_object_is_userptr(obj) ||
 349                    args->caching != I915_CACHING_CACHED)
 350                        ret = -ENXIO;
 351
 352                goto out;
 353        }
 354
 355        ret = i915_gem_object_lock_interruptible(obj, NULL);
 356        if (ret)
 357                goto out;
 358
 359        ret = i915_gem_object_set_cache_level(obj, level);
 360        i915_gem_object_unlock(obj);
 361
 362out:
 363        i915_gem_object_put(obj);
 364        return ret;
 365}
 366
 367/*
 368 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
 369 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
 370 * (for pageflips). We only flush the caches while preparing the buffer for
 371 * display, the callers are responsible for frontbuffer flush.
 372 */
 373struct i915_vma *
 374i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 375                                     struct i915_gem_ww_ctx *ww,
 376                                     u32 alignment,
 377                                     const struct i915_ggtt_view *view,
 378                                     unsigned int flags)
 379{
 380        struct drm_i915_private *i915 = to_i915(obj->base.dev);
 381        struct i915_vma *vma;
 382        int ret;
 383
 384        /* Frame buffer must be in LMEM */
 385        if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
 386                return ERR_PTR(-EINVAL);
 387
 388        /*
 389         * The display engine is not coherent with the LLC cache on gen6.  As
 390         * a result, we make sure that the pinning that is about to occur is
 391         * done with uncached PTEs. This is lowest common denominator for all
 392         * chipsets.
 393         *
 394         * However for gen6+, we could do better by using the GFDT bit instead
 395         * of uncaching, which would allow us to flush all the LLC-cached data
 396         * with that bit in the PTE to main memory with just one PIPE_CONTROL.
 397         */
 398        ret = i915_gem_object_set_cache_level(obj,
 399                                              HAS_WT(i915) ?
 400                                              I915_CACHE_WT : I915_CACHE_NONE);
 401        if (ret)
 402                return ERR_PTR(ret);
 403
 404        /*
 405         * As the user may map the buffer once pinned in the display plane
 406         * (e.g. libkms for the bootup splash), we have to ensure that we
 407         * always use map_and_fenceable for all scanout buffers. However,
 408         * it may simply be too big to fit into mappable, in which case
 409         * put it anyway and hope that userspace can cope (but always first
 410         * try to preserve the existing ABI).
 411         */
 412        vma = ERR_PTR(-ENOSPC);
 413        if ((flags & PIN_MAPPABLE) == 0 &&
 414            (!view || view->type == I915_GGTT_VIEW_NORMAL))
 415                vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
 416                                                  flags | PIN_MAPPABLE |
 417                                                  PIN_NONBLOCK);
 418        if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
 419                vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
 420                                                  alignment, flags);
 421        if (IS_ERR(vma))
 422                return vma;
 423
 424        vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 425        i915_vma_mark_scanout(vma);
 426
 427        i915_gem_object_flush_if_display_locked(obj);
 428
 429        return vma;
 430}
 431
 432/**
 433 * Moves a single object to the CPU read, and possibly write domain.
 434 * @obj: object to act on
 435 * @write: requesting write or read-only access
 436 *
 437 * This function returns when the move is complete, including waiting on
 438 * flushes to occur.
 439 */
 440int
 441i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 442{
 443        int ret;
 444
 445        assert_object_held(obj);
 446
 447        ret = i915_gem_object_wait(obj,
 448                                   I915_WAIT_INTERRUPTIBLE |
 449                                   (write ? I915_WAIT_ALL : 0),
 450                                   MAX_SCHEDULE_TIMEOUT);
 451        if (ret)
 452                return ret;
 453
 454        flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 455
 456        /* Flush the CPU cache if it's still invalid. */
 457        if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
 458                i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 459                obj->read_domains |= I915_GEM_DOMAIN_CPU;
 460        }
 461
 462        /* It should now be out of any other write domains, and we can update
 463         * the domain values for our changes.
 464         */
 465        GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
 466
 467        /* If we're writing through the CPU, then the GPU read domains will
 468         * need to be invalidated at next use.
 469         */
 470        if (write)
 471                __start_cpu_write(obj);
 472
 473        return 0;
 474}
 475
 476/**
 477 * Called when user space prepares to use an object with the CPU, either
 478 * through the mmap ioctl's mapping or a GTT mapping.
 479 * @dev: drm device
 480 * @data: ioctl data blob
 481 * @file: drm file
 482 */
 483int
 484i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 485                          struct drm_file *file)
 486{
 487        struct drm_i915_gem_set_domain *args = data;
 488        struct drm_i915_gem_object *obj;
 489        u32 read_domains = args->read_domains;
 490        u32 write_domain = args->write_domain;
 491        int err;
 492
 493        if (IS_DGFX(to_i915(dev)))
 494                return -ENODEV;
 495
 496        /* Only handle setting domains to types used by the CPU. */
 497        if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
 498                return -EINVAL;
 499
 500        /*
 501         * Having something in the write domain implies it's in the read
 502         * domain, and only that read domain.  Enforce that in the request.
 503         */
 504        if (write_domain && read_domains != write_domain)
 505                return -EINVAL;
 506
 507        if (!read_domains)
 508                return 0;
 509
 510        obj = i915_gem_object_lookup(file, args->handle);
 511        if (!obj)
 512                return -ENOENT;
 513
 514        /*
 515         * Try to flush the object off the GPU without holding the lock.
 516         * We will repeat the flush holding the lock in the normal manner
 517         * to catch cases where we are gazumped.
 518         */
 519        err = i915_gem_object_wait(obj,
 520                                   I915_WAIT_INTERRUPTIBLE |
 521                                   I915_WAIT_PRIORITY |
 522                                   (write_domain ? I915_WAIT_ALL : 0),
 523                                   MAX_SCHEDULE_TIMEOUT);
 524        if (err)
 525                goto out;
 526
 527        if (i915_gem_object_is_userptr(obj)) {
 528                /*
 529                 * Try to grab userptr pages, iris uses set_domain to check
 530                 * userptr validity
 531                 */
 532                err = i915_gem_object_userptr_validate(obj);
 533                if (!err)
 534                        err = i915_gem_object_wait(obj,
 535                                                   I915_WAIT_INTERRUPTIBLE |
 536                                                   I915_WAIT_PRIORITY |
 537                                                   (write_domain ? I915_WAIT_ALL : 0),
 538                                                   MAX_SCHEDULE_TIMEOUT);
 539                goto out;
 540        }
 541
 542        /*
 543         * Proxy objects do not control access to the backing storage, ergo
 544         * they cannot be used as a means to manipulate the cache domain
 545         * tracking for that backing storage. The proxy object is always
 546         * considered to be outside of any cache domain.
 547         */
 548        if (i915_gem_object_is_proxy(obj)) {
 549                err = -ENXIO;
 550                goto out;
 551        }
 552
 553        err = i915_gem_object_lock_interruptible(obj, NULL);
 554        if (err)
 555                goto out;
 556
 557        /*
 558         * Flush and acquire obj->pages so that we are coherent through
 559         * direct access in memory with previous cached writes through
 560         * shmemfs and that our cache domain tracking remains valid.
 561         * For example, if the obj->filp was moved to swap without us
 562         * being notified and releasing the pages, we would mistakenly
 563         * continue to assume that the obj remained out of the CPU cached
 564         * domain.
 565         */
 566        err = i915_gem_object_pin_pages(obj);
 567        if (err)
 568                goto out_unlock;
 569
 570        /*
 571         * Already in the desired write domain? Nothing for us to do!
 572         *
 573         * We apply a little bit of cunning here to catch a broader set of
 574         * no-ops. If obj->write_domain is set, we must be in the same
 575         * obj->read_domains, and only that domain. Therefore, if that
 576         * obj->write_domain matches the request read_domains, we are
 577         * already in the same read/write domain and can skip the operation,
 578         * without having to further check the requested write_domain.
 579         */
 580        if (READ_ONCE(obj->write_domain) == read_domains)
 581                goto out_unpin;
 582
 583        if (read_domains & I915_GEM_DOMAIN_WC)
 584                err = i915_gem_object_set_to_wc_domain(obj, write_domain);
 585        else if (read_domains & I915_GEM_DOMAIN_GTT)
 586                err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
 587        else
 588                err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
 589
 590out_unpin:
 591        i915_gem_object_unpin_pages(obj);
 592
 593out_unlock:
 594        i915_gem_object_unlock(obj);
 595
 596        if (!err && write_domain)
 597                i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
 598
 599out:
 600        i915_gem_object_put(obj);
 601        return err;
 602}
 603
 604/*
 605 * Pins the specified object's pages and synchronizes the object with
 606 * GPU accesses. Sets needs_clflush to non-zero if the caller should
 607 * flush the object from the CPU cache.
 608 */
 609int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 610                                 unsigned int *needs_clflush)
 611{
 612        int ret;
 613
 614        *needs_clflush = 0;
 615        if (!i915_gem_object_has_struct_page(obj))
 616                return -ENODEV;
 617
 618        assert_object_held(obj);
 619
 620        ret = i915_gem_object_wait(obj,
 621                                   I915_WAIT_INTERRUPTIBLE,
 622                                   MAX_SCHEDULE_TIMEOUT);
 623        if (ret)
 624                return ret;
 625
 626        ret = i915_gem_object_pin_pages(obj);
 627        if (ret)
 628                return ret;
 629
 630        if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
 631            !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 632                ret = i915_gem_object_set_to_cpu_domain(obj, false);
 633                if (ret)
 634                        goto err_unpin;
 635                else
 636                        goto out;
 637        }
 638
 639        flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 640
 641        /* If we're not in the cpu read domain, set ourself into the gtt
 642         * read domain and manually flush cachelines (if required). This
 643         * optimizes for the case when the gpu will dirty the data
 644         * anyway again before the next pread happens.
 645         */
 646        if (!obj->cache_dirty &&
 647            !(obj->read_domains & I915_GEM_DOMAIN_CPU))
 648                *needs_clflush = CLFLUSH_BEFORE;
 649
 650out:
 651        /* return with the pages pinned */
 652        return 0;
 653
 654err_unpin:
 655        i915_gem_object_unpin_pages(obj);
 656        return ret;
 657}
 658
 659int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 660                                  unsigned int *needs_clflush)
 661{
 662        int ret;
 663
 664        *needs_clflush = 0;
 665        if (!i915_gem_object_has_struct_page(obj))
 666                return -ENODEV;
 667
 668        assert_object_held(obj);
 669
 670        ret = i915_gem_object_wait(obj,
 671                                   I915_WAIT_INTERRUPTIBLE |
 672                                   I915_WAIT_ALL,
 673                                   MAX_SCHEDULE_TIMEOUT);
 674        if (ret)
 675                return ret;
 676
 677        ret = i915_gem_object_pin_pages(obj);
 678        if (ret)
 679                return ret;
 680
 681        if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
 682            !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 683                ret = i915_gem_object_set_to_cpu_domain(obj, true);
 684                if (ret)
 685                        goto err_unpin;
 686                else
 687                        goto out;
 688        }
 689
 690        flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 691
 692        /* If we're not in the cpu write domain, set ourself into the
 693         * gtt write domain and manually flush cachelines (as required).
 694         * This optimizes for the case when the gpu will use the data
 695         * right away and we therefore have to clflush anyway.
 696         */
 697        if (!obj->cache_dirty) {
 698                *needs_clflush |= CLFLUSH_AFTER;
 699
 700                /*
 701                 * Same trick applies to invalidate partially written
 702                 * cachelines read before writing.
 703                 */
 704                if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
 705                        *needs_clflush |= CLFLUSH_BEFORE;
 706        }
 707
 708out:
 709        i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
 710        obj->mm.dirty = true;
 711        /* return with the pages pinned */
 712        return 0;
 713
 714err_unpin:
 715        i915_gem_object_unpin_pages(obj);
 716        return ret;
 717}
 718