linux/drivers/gpu/drm/i915/gem/i915_gem_domain.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2014-2016 Intel Corporation
   5 */
   6
   7#include "display/intel_frontbuffer.h"
   8#include "gt/intel_gt.h"
   9
  10#include "i915_drv.h"
  11#include "i915_gem_clflush.h"
  12#include "i915_gem_gtt.h"
  13#include "i915_gem_ioctls.h"
  14#include "i915_gem_object.h"
  15#include "i915_vma.h"
  16#include "i915_gem_lmem.h"
  17#include "i915_gem_mman.h"
  18
  19static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
  20{
  21        return !(obj->cache_level == I915_CACHE_NONE ||
  22                 obj->cache_level == I915_CACHE_WT);
  23}
  24
  25static void
  26flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
  27{
  28        struct i915_vma *vma;
  29
  30        assert_object_held(obj);
  31
  32        if (!(obj->write_domain & flush_domains))
  33                return;
  34
  35        switch (obj->write_domain) {
  36        case I915_GEM_DOMAIN_GTT:
  37                spin_lock(&obj->vma.lock);
  38                for_each_ggtt_vma(vma, obj) {
  39                        if (i915_vma_unset_ggtt_write(vma))
  40                                intel_gt_flush_ggtt_writes(vma->vm->gt);
  41                }
  42                spin_unlock(&obj->vma.lock);
  43
  44                i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
  45                break;
  46
  47        case I915_GEM_DOMAIN_WC:
  48                wmb();
  49                break;
  50
  51        case I915_GEM_DOMAIN_CPU:
  52                i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
  53                break;
  54
  55        case I915_GEM_DOMAIN_RENDER:
  56                if (gpu_write_needs_clflush(obj))
  57                        obj->cache_dirty = true;
  58                break;
  59        }
  60
  61        obj->write_domain = 0;
  62}
  63
  64static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
  65{
  66        /*
  67         * We manually flush the CPU domain so that we can override and
  68         * force the flush for the display, and perform it asyncrhonously.
  69         */
  70        flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
  71        if (obj->cache_dirty)
  72                i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
  73        obj->write_domain = 0;
  74}
  75
  76void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
  77{
  78        if (!i915_gem_object_is_framebuffer(obj))
  79                return;
  80
  81        i915_gem_object_lock(obj, NULL);
  82        __i915_gem_object_flush_for_display(obj);
  83        i915_gem_object_unlock(obj);
  84}
  85
  86void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
  87{
  88        if (i915_gem_object_is_framebuffer(obj))
  89                __i915_gem_object_flush_for_display(obj);
  90}
  91
  92/**
  93 * Moves a single object to the WC read, and possibly write domain.
  94 * @obj: object to act on
  95 * @write: ask for write access or read only
  96 *
  97 * This function returns when the move is complete, including waiting on
  98 * flushes to occur.
  99 */
 100int
 101i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 102{
 103        int ret;
 104
 105        assert_object_held(obj);
 106
 107        ret = i915_gem_object_wait(obj,
 108                                   I915_WAIT_INTERRUPTIBLE |
 109                                   (write ? I915_WAIT_ALL : 0),
 110                                   MAX_SCHEDULE_TIMEOUT);
 111        if (ret)
 112                return ret;
 113
 114        if (obj->write_domain == I915_GEM_DOMAIN_WC)
 115                return 0;
 116
 117        /* Flush and acquire obj->pages so that we are coherent through
 118         * direct access in memory with previous cached writes through
 119         * shmemfs and that our cache domain tracking remains valid.
 120         * For example, if the obj->filp was moved to swap without us
 121         * being notified and releasing the pages, we would mistakenly
 122         * continue to assume that the obj remained out of the CPU cached
 123         * domain.
 124         */
 125        ret = i915_gem_object_pin_pages(obj);
 126        if (ret)
 127                return ret;
 128
 129        flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
 130
 131        /* Serialise direct access to this object with the barriers for
 132         * coherent writes from the GPU, by effectively invalidating the
 133         * WC domain upon first access.
 134         */
 135        if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
 136                mb();
 137
 138        /* It should now be out of any other write domains, and we can update
 139         * the domain values for our changes.
 140         */
 141        GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
 142        obj->read_domains |= I915_GEM_DOMAIN_WC;
 143        if (write) {
 144                obj->read_domains = I915_GEM_DOMAIN_WC;
 145                obj->write_domain = I915_GEM_DOMAIN_WC;
 146                obj->mm.dirty = true;
 147        }
 148
 149        i915_gem_object_unpin_pages(obj);
 150        return 0;
 151}
 152
 153/**
 154 * Moves a single object to the GTT read, and possibly write domain.
 155 * @obj: object to act on
 156 * @write: ask for write access or read only
 157 *
 158 * This function returns when the move is complete, including waiting on
 159 * flushes to occur.
 160 */
 161int
 162i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 163{
 164        int ret;
 165
 166        assert_object_held(obj);
 167
 168        ret = i915_gem_object_wait(obj,
 169                                   I915_WAIT_INTERRUPTIBLE |
 170                                   (write ? I915_WAIT_ALL : 0),
 171                                   MAX_SCHEDULE_TIMEOUT);
 172        if (ret)
 173                return ret;
 174
 175        if (obj->write_domain == I915_GEM_DOMAIN_GTT)
 176                return 0;
 177
 178        /* Flush and acquire obj->pages so that we are coherent through
 179         * direct access in memory with previous cached writes through
 180         * shmemfs and that our cache domain tracking remains valid.
 181         * For example, if the obj->filp was moved to swap without us
 182         * being notified and releasing the pages, we would mistakenly
 183         * continue to assume that the obj remained out of the CPU cached
 184         * domain.
 185         */
 186        ret = i915_gem_object_pin_pages(obj);
 187        if (ret)
 188                return ret;
 189
 190        flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
 191
 192        /* Serialise direct access to this object with the barriers for
 193         * coherent writes from the GPU, by effectively invalidating the
 194         * GTT domain upon first access.
 195         */
 196        if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
 197                mb();
 198
 199        /* It should now be out of any other write domains, and we can update
 200         * the domain values for our changes.
 201         */
 202        GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
 203        obj->read_domains |= I915_GEM_DOMAIN_GTT;
 204        if (write) {
 205                struct i915_vma *vma;
 206
 207                obj->read_domains = I915_GEM_DOMAIN_GTT;
 208                obj->write_domain = I915_GEM_DOMAIN_GTT;
 209                obj->mm.dirty = true;
 210
 211                spin_lock(&obj->vma.lock);
 212                for_each_ggtt_vma(vma, obj)
 213                        if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
 214                                i915_vma_set_ggtt_write(vma);
 215                spin_unlock(&obj->vma.lock);
 216        }
 217
 218        i915_gem_object_unpin_pages(obj);
 219        return 0;
 220}
 221
 222/**
 223 * Changes the cache-level of an object across all VMA.
 224 * @obj: object to act on
 225 * @cache_level: new cache level to set for the object
 226 *
 227 * After this function returns, the object will be in the new cache-level
 228 * across all GTT and the contents of the backing storage will be coherent,
 229 * with respect to the new cache-level. In order to keep the backing storage
 230 * coherent for all users, we only allow a single cache level to be set
 231 * globally on the object and prevent it from being changed whilst the
 232 * hardware is reading from the object. That is if the object is currently
 233 * on the scanout it will be set to uncached (or equivalent display
 234 * cache coherency) and all non-MOCS GPU access will also be uncached so
 235 * that all direct access to the scanout remains coherent.
 236 */
 237int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 238                                    enum i915_cache_level cache_level)
 239{
 240        int ret;
 241
 242        if (obj->cache_level == cache_level)
 243                return 0;
 244
 245        ret = i915_gem_object_wait(obj,
 246                                   I915_WAIT_INTERRUPTIBLE |
 247                                   I915_WAIT_ALL,
 248                                   MAX_SCHEDULE_TIMEOUT);
 249        if (ret)
 250                return ret;
 251
 252        /* Always invalidate stale cachelines */
 253        if (obj->cache_level != cache_level) {
 254                i915_gem_object_set_cache_coherency(obj, cache_level);
 255                obj->cache_dirty = true;
 256        }
 257
 258        /* The cache-level will be applied when each vma is rebound. */
 259        return i915_gem_object_unbind(obj,
 260                                      I915_GEM_OBJECT_UNBIND_ACTIVE |
 261                                      I915_GEM_OBJECT_UNBIND_BARRIER);
 262}
 263
 264int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
 265                               struct drm_file *file)
 266{
 267        struct drm_i915_gem_caching *args = data;
 268        struct drm_i915_gem_object *obj;
 269        int err = 0;
 270
 271        rcu_read_lock();
 272        obj = i915_gem_object_lookup_rcu(file, args->handle);
 273        if (!obj) {
 274                err = -ENOENT;
 275                goto out;
 276        }
 277
 278        switch (obj->cache_level) {
 279        case I915_CACHE_LLC:
 280        case I915_CACHE_L3_LLC:
 281                args->caching = I915_CACHING_CACHED;
 282                break;
 283
 284        case I915_CACHE_WT:
 285                args->caching = I915_CACHING_DISPLAY;
 286                break;
 287
 288        default:
 289                args->caching = I915_CACHING_NONE;
 290                break;
 291        }
 292out:
 293        rcu_read_unlock();
 294        return err;
 295}
 296
 297int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 298                               struct drm_file *file)
 299{
 300        struct drm_i915_private *i915 = to_i915(dev);
 301        struct drm_i915_gem_caching *args = data;
 302        struct drm_i915_gem_object *obj;
 303        enum i915_cache_level level;
 304        int ret = 0;
 305
 306        switch (args->caching) {
 307        case I915_CACHING_NONE:
 308                level = I915_CACHE_NONE;
 309                break;
 310        case I915_CACHING_CACHED:
 311                /*
 312                 * Due to a HW issue on BXT A stepping, GPU stores via a
 313                 * snooped mapping may leave stale data in a corresponding CPU
 314                 * cacheline, whereas normally such cachelines would get
 315                 * invalidated.
 316                 */
 317                if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
 318                        return -ENODEV;
 319
 320                level = I915_CACHE_LLC;
 321                break;
 322        case I915_CACHING_DISPLAY:
 323                level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
 324                break;
 325        default:
 326                return -EINVAL;
 327        }
 328
 329        obj = i915_gem_object_lookup(file, args->handle);
 330        if (!obj)
 331                return -ENOENT;
 332
 333        /*
 334         * The caching mode of proxy object is handled by its generator, and
 335         * not allowed to be changed by userspace.
 336         */
 337        if (i915_gem_object_is_proxy(obj)) {
 338                ret = -ENXIO;
 339                goto out;
 340        }
 341
 342        ret = i915_gem_object_lock_interruptible(obj, NULL);
 343        if (ret)
 344                goto out;
 345
 346        ret = i915_gem_object_set_cache_level(obj, level);
 347        i915_gem_object_unlock(obj);
 348
 349out:
 350        i915_gem_object_put(obj);
 351        return ret;
 352}
 353
 354/*
 355 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
 356 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
 357 * (for pageflips). We only flush the caches while preparing the buffer for
 358 * display, the callers are responsible for frontbuffer flush.
 359 */
 360struct i915_vma *
 361i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 362                                     u32 alignment,
 363                                     const struct i915_ggtt_view *view,
 364                                     unsigned int flags)
 365{
 366        struct drm_i915_private *i915 = to_i915(obj->base.dev);
 367        struct i915_gem_ww_ctx ww;
 368        struct i915_vma *vma;
 369        int ret;
 370
 371        /* Frame buffer must be in LMEM (no migration yet) */
 372        if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
 373                return ERR_PTR(-EINVAL);
 374
 375        i915_gem_ww_ctx_init(&ww, true);
 376retry:
 377        ret = i915_gem_object_lock(obj, &ww);
 378        if (ret)
 379                goto err;
 380        /*
 381         * The display engine is not coherent with the LLC cache on gen6.  As
 382         * a result, we make sure that the pinning that is about to occur is
 383         * done with uncached PTEs. This is lowest common denominator for all
 384         * chipsets.
 385         *
 386         * However for gen6+, we could do better by using the GFDT bit instead
 387         * of uncaching, which would allow us to flush all the LLC-cached data
 388         * with that bit in the PTE to main memory with just one PIPE_CONTROL.
 389         */
 390        ret = i915_gem_object_set_cache_level(obj,
 391                                              HAS_WT(i915) ?
 392                                              I915_CACHE_WT : I915_CACHE_NONE);
 393        if (ret)
 394                goto err;
 395
 396        /*
 397         * As the user may map the buffer once pinned in the display plane
 398         * (e.g. libkms for the bootup splash), we have to ensure that we
 399         * always use map_and_fenceable for all scanout buffers. However,
 400         * it may simply be too big to fit into mappable, in which case
 401         * put it anyway and hope that userspace can cope (but always first
 402         * try to preserve the existing ABI).
 403         */
 404        vma = ERR_PTR(-ENOSPC);
 405        if ((flags & PIN_MAPPABLE) == 0 &&
 406            (!view || view->type == I915_GGTT_VIEW_NORMAL))
 407                vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment,
 408                                                  flags | PIN_MAPPABLE |
 409                                                  PIN_NONBLOCK);
 410        if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
 411                vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0,
 412                                                  alignment, flags);
 413        if (IS_ERR(vma)) {
 414                ret = PTR_ERR(vma);
 415                goto err;
 416        }
 417
 418        vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 419        i915_vma_mark_scanout(vma);
 420
 421        i915_gem_object_flush_if_display_locked(obj);
 422
 423err:
 424        if (ret == -EDEADLK) {
 425                ret = i915_gem_ww_ctx_backoff(&ww);
 426                if (!ret)
 427                        goto retry;
 428        }
 429        i915_gem_ww_ctx_fini(&ww);
 430
 431        if (ret)
 432                return ERR_PTR(ret);
 433
 434        return vma;
 435}
 436
 437/**
 438 * Moves a single object to the CPU read, and possibly write domain.
 439 * @obj: object to act on
 440 * @write: requesting write or read-only access
 441 *
 442 * This function returns when the move is complete, including waiting on
 443 * flushes to occur.
 444 */
 445int
 446i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 447{
 448        int ret;
 449
 450        assert_object_held(obj);
 451
 452        ret = i915_gem_object_wait(obj,
 453                                   I915_WAIT_INTERRUPTIBLE |
 454                                   (write ? I915_WAIT_ALL : 0),
 455                                   MAX_SCHEDULE_TIMEOUT);
 456        if (ret)
 457                return ret;
 458
 459        flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 460
 461        /* Flush the CPU cache if it's still invalid. */
 462        if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
 463                i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 464                obj->read_domains |= I915_GEM_DOMAIN_CPU;
 465        }
 466
 467        /* It should now be out of any other write domains, and we can update
 468         * the domain values for our changes.
 469         */
 470        GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
 471
 472        /* If we're writing through the CPU, then the GPU read domains will
 473         * need to be invalidated at next use.
 474         */
 475        if (write)
 476                __start_cpu_write(obj);
 477
 478        return 0;
 479}
 480
 481/**
 482 * Called when user space prepares to use an object with the CPU, either
 483 * through the mmap ioctl's mapping or a GTT mapping.
 484 * @dev: drm device
 485 * @data: ioctl data blob
 486 * @file: drm file
 487 */
 488int
 489i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 490                          struct drm_file *file)
 491{
 492        struct drm_i915_gem_set_domain *args = data;
 493        struct drm_i915_gem_object *obj;
 494        u32 read_domains = args->read_domains;
 495        u32 write_domain = args->write_domain;
 496        int err;
 497
 498        /* Only handle setting domains to types used by the CPU. */
 499        if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
 500                return -EINVAL;
 501
 502        /*
 503         * Having something in the write domain implies it's in the read
 504         * domain, and only that read domain.  Enforce that in the request.
 505         */
 506        if (write_domain && read_domains != write_domain)
 507                return -EINVAL;
 508
 509        if (!read_domains)
 510                return 0;
 511
 512        obj = i915_gem_object_lookup(file, args->handle);
 513        if (!obj)
 514                return -ENOENT;
 515
 516        /*
 517         * Try to flush the object off the GPU without holding the lock.
 518         * We will repeat the flush holding the lock in the normal manner
 519         * to catch cases where we are gazumped.
 520         */
 521        err = i915_gem_object_wait(obj,
 522                                   I915_WAIT_INTERRUPTIBLE |
 523                                   I915_WAIT_PRIORITY |
 524                                   (write_domain ? I915_WAIT_ALL : 0),
 525                                   MAX_SCHEDULE_TIMEOUT);
 526        if (err)
 527                goto out;
 528
 529        /*
 530         * Proxy objects do not control access to the backing storage, ergo
 531         * they cannot be used as a means to manipulate the cache domain
 532         * tracking for that backing storage. The proxy object is always
 533         * considered to be outside of any cache domain.
 534         */
 535        if (i915_gem_object_is_proxy(obj)) {
 536                err = -ENXIO;
 537                goto out;
 538        }
 539
 540        /*
 541         * Flush and acquire obj->pages so that we are coherent through
 542         * direct access in memory with previous cached writes through
 543         * shmemfs and that our cache domain tracking remains valid.
 544         * For example, if the obj->filp was moved to swap without us
 545         * being notified and releasing the pages, we would mistakenly
 546         * continue to assume that the obj remained out of the CPU cached
 547         * domain.
 548         */
 549        err = i915_gem_object_pin_pages(obj);
 550        if (err)
 551                goto out;
 552
 553        /*
 554         * Already in the desired write domain? Nothing for us to do!
 555         *
 556         * We apply a little bit of cunning here to catch a broader set of
 557         * no-ops. If obj->write_domain is set, we must be in the same
 558         * obj->read_domains, and only that domain. Therefore, if that
 559         * obj->write_domain matches the request read_domains, we are
 560         * already in the same read/write domain and can skip the operation,
 561         * without having to further check the requested write_domain.
 562         */
 563        if (READ_ONCE(obj->write_domain) == read_domains)
 564                goto out_unpin;
 565
 566        err = i915_gem_object_lock_interruptible(obj, NULL);
 567        if (err)
 568                goto out_unpin;
 569
 570        if (read_domains & I915_GEM_DOMAIN_WC)
 571                err = i915_gem_object_set_to_wc_domain(obj, write_domain);
 572        else if (read_domains & I915_GEM_DOMAIN_GTT)
 573                err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
 574        else
 575                err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
 576
 577        i915_gem_object_unlock(obj);
 578
 579        if (write_domain)
 580                i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
 581
 582out_unpin:
 583        i915_gem_object_unpin_pages(obj);
 584out:
 585        i915_gem_object_put(obj);
 586        return err;
 587}
 588
 589/*
 590 * Pins the specified object's pages and synchronizes the object with
 591 * GPU accesses. Sets needs_clflush to non-zero if the caller should
 592 * flush the object from the CPU cache.
 593 */
 594int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 595                                 unsigned int *needs_clflush)
 596{
 597        int ret;
 598
 599        *needs_clflush = 0;
 600        if (!i915_gem_object_has_struct_page(obj))
 601                return -ENODEV;
 602
 603        assert_object_held(obj);
 604
 605        ret = i915_gem_object_wait(obj,
 606                                   I915_WAIT_INTERRUPTIBLE,
 607                                   MAX_SCHEDULE_TIMEOUT);
 608        if (ret)
 609                return ret;
 610
 611        ret = i915_gem_object_pin_pages(obj);
 612        if (ret)
 613                return ret;
 614
 615        if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
 616            !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 617                ret = i915_gem_object_set_to_cpu_domain(obj, false);
 618                if (ret)
 619                        goto err_unpin;
 620                else
 621                        goto out;
 622        }
 623
 624        flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 625
 626        /* If we're not in the cpu read domain, set ourself into the gtt
 627         * read domain and manually flush cachelines (if required). This
 628         * optimizes for the case when the gpu will dirty the data
 629         * anyway again before the next pread happens.
 630         */
 631        if (!obj->cache_dirty &&
 632            !(obj->read_domains & I915_GEM_DOMAIN_CPU))
 633                *needs_clflush = CLFLUSH_BEFORE;
 634
 635out:
 636        /* return with the pages pinned */
 637        return 0;
 638
 639err_unpin:
 640        i915_gem_object_unpin_pages(obj);
 641        return ret;
 642}
 643
 644int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 645                                  unsigned int *needs_clflush)
 646{
 647        int ret;
 648
 649        *needs_clflush = 0;
 650        if (!i915_gem_object_has_struct_page(obj))
 651                return -ENODEV;
 652
 653        assert_object_held(obj);
 654
 655        ret = i915_gem_object_wait(obj,
 656                                   I915_WAIT_INTERRUPTIBLE |
 657                                   I915_WAIT_ALL,
 658                                   MAX_SCHEDULE_TIMEOUT);
 659        if (ret)
 660                return ret;
 661
 662        ret = i915_gem_object_pin_pages(obj);
 663        if (ret)
 664                return ret;
 665
 666        if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
 667            !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 668                ret = i915_gem_object_set_to_cpu_domain(obj, true);
 669                if (ret)
 670                        goto err_unpin;
 671                else
 672                        goto out;
 673        }
 674
 675        flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 676
 677        /* If we're not in the cpu write domain, set ourself into the
 678         * gtt write domain and manually flush cachelines (as required).
 679         * This optimizes for the case when the gpu will use the data
 680         * right away and we therefore have to clflush anyway.
 681         */
 682        if (!obj->cache_dirty) {
 683                *needs_clflush |= CLFLUSH_AFTER;
 684
 685                /*
 686                 * Same trick applies to invalidate partially written
 687                 * cachelines read before writing.
 688                 */
 689                if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
 690                        *needs_clflush |= CLFLUSH_BEFORE;
 691        }
 692
 693out:
 694        i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
 695        obj->mm.dirty = true;
 696        /* return with the pages pinned */
 697        return 0;
 698
 699err_unpin:
 700        i915_gem_object_unpin_pages(obj);
 701        return ret;
 702}
 703