linux/drivers/gpu/drm/i915/gem/i915_gem_domain.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2014-2016 Intel Corporation
   5 */
   6
   7#include "display/intel_frontbuffer.h"
   8
   9#include "i915_drv.h"
  10#include "i915_gem_clflush.h"
  11#include "i915_gem_gtt.h"
  12#include "i915_gem_ioctls.h"
  13#include "i915_gem_object.h"
  14#include "i915_vma.h"
  15#include "i915_gem_lmem.h"
  16#include "i915_gem_mman.h"
  17
  18static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
  19{
  20        /*
  21         * We manually flush the CPU domain so that we can override and
  22         * force the flush for the display, and perform it asyncrhonously.
  23         */
  24        i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
  25        if (obj->cache_dirty)
  26                i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
  27        obj->write_domain = 0;
  28}
  29
  30void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
  31{
  32        if (!i915_gem_object_is_framebuffer(obj))
  33                return;
  34
  35        i915_gem_object_lock(obj, NULL);
  36        __i915_gem_object_flush_for_display(obj);
  37        i915_gem_object_unlock(obj);
  38}
  39
  40void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
  41{
  42        if (i915_gem_object_is_framebuffer(obj))
  43                __i915_gem_object_flush_for_display(obj);
  44}
  45
  46/**
  47 * Moves a single object to the WC read, and possibly write domain.
  48 * @obj: object to act on
  49 * @write: ask for write access or read only
  50 *
  51 * This function returns when the move is complete, including waiting on
  52 * flushes to occur.
  53 */
  54int
  55i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
  56{
  57        int ret;
  58
  59        assert_object_held(obj);
  60
  61        ret = i915_gem_object_wait(obj,
  62                                   I915_WAIT_INTERRUPTIBLE |
  63                                   (write ? I915_WAIT_ALL : 0),
  64                                   MAX_SCHEDULE_TIMEOUT);
  65        if (ret)
  66                return ret;
  67
  68        if (obj->write_domain == I915_GEM_DOMAIN_WC)
  69                return 0;
  70
  71        /* Flush and acquire obj->pages so that we are coherent through
  72         * direct access in memory with previous cached writes through
  73         * shmemfs and that our cache domain tracking remains valid.
  74         * For example, if the obj->filp was moved to swap without us
  75         * being notified and releasing the pages, we would mistakenly
  76         * continue to assume that the obj remained out of the CPU cached
  77         * domain.
  78         */
  79        ret = i915_gem_object_pin_pages(obj);
  80        if (ret)
  81                return ret;
  82
  83        i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
  84
  85        /* Serialise direct access to this object with the barriers for
  86         * coherent writes from the GPU, by effectively invalidating the
  87         * WC domain upon first access.
  88         */
  89        if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
  90                mb();
  91
  92        /* It should now be out of any other write domains, and we can update
  93         * the domain values for our changes.
  94         */
  95        GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
  96        obj->read_domains |= I915_GEM_DOMAIN_WC;
  97        if (write) {
  98                obj->read_domains = I915_GEM_DOMAIN_WC;
  99                obj->write_domain = I915_GEM_DOMAIN_WC;
 100                obj->mm.dirty = true;
 101        }
 102
 103        i915_gem_object_unpin_pages(obj);
 104        return 0;
 105}
 106
 107/**
 108 * Moves a single object to the GTT read, and possibly write domain.
 109 * @obj: object to act on
 110 * @write: ask for write access or read only
 111 *
 112 * This function returns when the move is complete, including waiting on
 113 * flushes to occur.
 114 */
 115int
 116i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 117{
 118        int ret;
 119
 120        assert_object_held(obj);
 121
 122        ret = i915_gem_object_wait(obj,
 123                                   I915_WAIT_INTERRUPTIBLE |
 124                                   (write ? I915_WAIT_ALL : 0),
 125                                   MAX_SCHEDULE_TIMEOUT);
 126        if (ret)
 127                return ret;
 128
 129        if (obj->write_domain == I915_GEM_DOMAIN_GTT)
 130                return 0;
 131
 132        /* Flush and acquire obj->pages so that we are coherent through
 133         * direct access in memory with previous cached writes through
 134         * shmemfs and that our cache domain tracking remains valid.
 135         * For example, if the obj->filp was moved to swap without us
 136         * being notified and releasing the pages, we would mistakenly
 137         * continue to assume that the obj remained out of the CPU cached
 138         * domain.
 139         */
 140        ret = i915_gem_object_pin_pages(obj);
 141        if (ret)
 142                return ret;
 143
 144        i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
 145
 146        /* Serialise direct access to this object with the barriers for
 147         * coherent writes from the GPU, by effectively invalidating the
 148         * GTT domain upon first access.
 149         */
 150        if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
 151                mb();
 152
 153        /* It should now be out of any other write domains, and we can update
 154         * the domain values for our changes.
 155         */
 156        GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
 157        obj->read_domains |= I915_GEM_DOMAIN_GTT;
 158        if (write) {
 159                struct i915_vma *vma;
 160
 161                obj->read_domains = I915_GEM_DOMAIN_GTT;
 162                obj->write_domain = I915_GEM_DOMAIN_GTT;
 163                obj->mm.dirty = true;
 164
 165                spin_lock(&obj->vma.lock);
 166                for_each_ggtt_vma(vma, obj)
 167                        if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
 168                                i915_vma_set_ggtt_write(vma);
 169                spin_unlock(&obj->vma.lock);
 170        }
 171
 172        i915_gem_object_unpin_pages(obj);
 173        return 0;
 174}
 175
 176/**
 177 * Changes the cache-level of an object across all VMA.
 178 * @obj: object to act on
 179 * @cache_level: new cache level to set for the object
 180 *
 181 * After this function returns, the object will be in the new cache-level
 182 * across all GTT and the contents of the backing storage will be coherent,
 183 * with respect to the new cache-level. In order to keep the backing storage
 184 * coherent for all users, we only allow a single cache level to be set
 185 * globally on the object and prevent it from being changed whilst the
 186 * hardware is reading from the object. That is if the object is currently
 187 * on the scanout it will be set to uncached (or equivalent display
 188 * cache coherency) and all non-MOCS GPU access will also be uncached so
 189 * that all direct access to the scanout remains coherent.
 190 */
 191int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 192                                    enum i915_cache_level cache_level)
 193{
 194        int ret;
 195
 196        if (obj->cache_level == cache_level)
 197                return 0;
 198
 199        ret = i915_gem_object_wait(obj,
 200                                   I915_WAIT_INTERRUPTIBLE |
 201                                   I915_WAIT_ALL,
 202                                   MAX_SCHEDULE_TIMEOUT);
 203        if (ret)
 204                return ret;
 205
 206        /* Always invalidate stale cachelines */
 207        if (obj->cache_level != cache_level) {
 208                i915_gem_object_set_cache_coherency(obj, cache_level);
 209                obj->cache_dirty = true;
 210        }
 211
 212        /* The cache-level will be applied when each vma is rebound. */
 213        return i915_gem_object_unbind(obj,
 214                                      I915_GEM_OBJECT_UNBIND_ACTIVE |
 215                                      I915_GEM_OBJECT_UNBIND_BARRIER);
 216}
 217
 218int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
 219                               struct drm_file *file)
 220{
 221        struct drm_i915_gem_caching *args = data;
 222        struct drm_i915_gem_object *obj;
 223        int err = 0;
 224
 225        rcu_read_lock();
 226        obj = i915_gem_object_lookup_rcu(file, args->handle);
 227        if (!obj) {
 228                err = -ENOENT;
 229                goto out;
 230        }
 231
 232        switch (obj->cache_level) {
 233        case I915_CACHE_LLC:
 234        case I915_CACHE_L3_LLC:
 235                args->caching = I915_CACHING_CACHED;
 236                break;
 237
 238        case I915_CACHE_WT:
 239                args->caching = I915_CACHING_DISPLAY;
 240                break;
 241
 242        default:
 243                args->caching = I915_CACHING_NONE;
 244                break;
 245        }
 246out:
 247        rcu_read_unlock();
 248        return err;
 249}
 250
 251int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 252                               struct drm_file *file)
 253{
 254        struct drm_i915_private *i915 = to_i915(dev);
 255        struct drm_i915_gem_caching *args = data;
 256        struct drm_i915_gem_object *obj;
 257        enum i915_cache_level level;
 258        int ret = 0;
 259
 260        switch (args->caching) {
 261        case I915_CACHING_NONE:
 262                level = I915_CACHE_NONE;
 263                break;
 264        case I915_CACHING_CACHED:
 265                /*
 266                 * Due to a HW issue on BXT A stepping, GPU stores via a
 267                 * snooped mapping may leave stale data in a corresponding CPU
 268                 * cacheline, whereas normally such cachelines would get
 269                 * invalidated.
 270                 */
 271                if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
 272                        return -ENODEV;
 273
 274                level = I915_CACHE_LLC;
 275                break;
 276        case I915_CACHING_DISPLAY:
 277                level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
 278                break;
 279        default:
 280                return -EINVAL;
 281        }
 282
 283        obj = i915_gem_object_lookup(file, args->handle);
 284        if (!obj)
 285                return -ENOENT;
 286
 287        /*
 288         * The caching mode of proxy object is handled by its generator, and
 289         * not allowed to be changed by userspace.
 290         */
 291        if (i915_gem_object_is_proxy(obj)) {
 292                ret = -ENXIO;
 293                goto out;
 294        }
 295
 296        ret = i915_gem_object_lock_interruptible(obj, NULL);
 297        if (ret)
 298                goto out;
 299
 300        ret = i915_gem_object_set_cache_level(obj, level);
 301        i915_gem_object_unlock(obj);
 302
 303out:
 304        i915_gem_object_put(obj);
 305        return ret;
 306}
 307
 308/*
 309 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
 310 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
 311 * (for pageflips). We only flush the caches while preparing the buffer for
 312 * display, the callers are responsible for frontbuffer flush.
 313 */
 314struct i915_vma *
 315i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 316                                     u32 alignment,
 317                                     const struct i915_ggtt_view *view,
 318                                     unsigned int flags)
 319{
 320        struct drm_i915_private *i915 = to_i915(obj->base.dev);
 321        struct i915_gem_ww_ctx ww;
 322        struct i915_vma *vma;
 323        int ret;
 324
 325        /* Frame buffer must be in LMEM (no migration yet) */
 326        if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
 327                return ERR_PTR(-EINVAL);
 328
 329        i915_gem_ww_ctx_init(&ww, true);
 330retry:
 331        ret = i915_gem_object_lock(obj, &ww);
 332        if (ret)
 333                goto err;
 334        /*
 335         * The display engine is not coherent with the LLC cache on gen6.  As
 336         * a result, we make sure that the pinning that is about to occur is
 337         * done with uncached PTEs. This is lowest common denominator for all
 338         * chipsets.
 339         *
 340         * However for gen6+, we could do better by using the GFDT bit instead
 341         * of uncaching, which would allow us to flush all the LLC-cached data
 342         * with that bit in the PTE to main memory with just one PIPE_CONTROL.
 343         */
 344        ret = i915_gem_object_set_cache_level(obj,
 345                                              HAS_WT(i915) ?
 346                                              I915_CACHE_WT : I915_CACHE_NONE);
 347        if (ret)
 348                goto err;
 349
 350        /*
 351         * As the user may map the buffer once pinned in the display plane
 352         * (e.g. libkms for the bootup splash), we have to ensure that we
 353         * always use map_and_fenceable for all scanout buffers. However,
 354         * it may simply be too big to fit into mappable, in which case
 355         * put it anyway and hope that userspace can cope (but always first
 356         * try to preserve the existing ABI).
 357         */
 358        vma = ERR_PTR(-ENOSPC);
 359        if ((flags & PIN_MAPPABLE) == 0 &&
 360            (!view || view->type == I915_GGTT_VIEW_NORMAL))
 361                vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment,
 362                                                  flags | PIN_MAPPABLE |
 363                                                  PIN_NONBLOCK);
 364        if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
 365                vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0,
 366                                                  alignment, flags);
 367        if (IS_ERR(vma)) {
 368                ret = PTR_ERR(vma);
 369                goto err;
 370        }
 371
 372        vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 373
 374        i915_gem_object_flush_if_display_locked(obj);
 375
 376err:
 377        if (ret == -EDEADLK) {
 378                ret = i915_gem_ww_ctx_backoff(&ww);
 379                if (!ret)
 380                        goto retry;
 381        }
 382        i915_gem_ww_ctx_fini(&ww);
 383
 384        if (ret)
 385                return ERR_PTR(ret);
 386
 387        return vma;
 388}
 389
 390static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 391{
 392        struct drm_i915_private *i915 = to_i915(obj->base.dev);
 393        struct i915_vma *vma;
 394
 395        if (list_empty(&obj->vma.list))
 396                return;
 397
 398        mutex_lock(&i915->ggtt.vm.mutex);
 399        spin_lock(&obj->vma.lock);
 400        for_each_ggtt_vma(vma, obj) {
 401                if (!drm_mm_node_allocated(&vma->node))
 402                        continue;
 403
 404                GEM_BUG_ON(vma->vm != &i915->ggtt.vm);
 405                list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 406        }
 407        spin_unlock(&obj->vma.lock);
 408        mutex_unlock(&i915->ggtt.vm.mutex);
 409
 410        if (i915_gem_object_is_shrinkable(obj)) {
 411                unsigned long flags;
 412
 413                spin_lock_irqsave(&i915->mm.obj_lock, flags);
 414
 415                if (obj->mm.madv == I915_MADV_WILLNEED &&
 416                    !atomic_read(&obj->mm.shrink_pin))
 417                        list_move_tail(&obj->mm.link, &i915->mm.shrink_list);
 418
 419                spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 420        }
 421}
 422
 423void
 424i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 425{
 426        /* Bump the LRU to try and avoid premature eviction whilst flipping  */
 427        i915_gem_object_bump_inactive_ggtt(vma->obj);
 428
 429        i915_vma_unpin(vma);
 430}
 431
 432/**
 433 * Moves a single object to the CPU read, and possibly write domain.
 434 * @obj: object to act on
 435 * @write: requesting write or read-only access
 436 *
 437 * This function returns when the move is complete, including waiting on
 438 * flushes to occur.
 439 */
 440int
 441i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 442{
 443        int ret;
 444
 445        assert_object_held(obj);
 446
 447        ret = i915_gem_object_wait(obj,
 448                                   I915_WAIT_INTERRUPTIBLE |
 449                                   (write ? I915_WAIT_ALL : 0),
 450                                   MAX_SCHEDULE_TIMEOUT);
 451        if (ret)
 452                return ret;
 453
 454        i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 455
 456        /* Flush the CPU cache if it's still invalid. */
 457        if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
 458                i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 459                obj->read_domains |= I915_GEM_DOMAIN_CPU;
 460        }
 461
 462        /* It should now be out of any other write domains, and we can update
 463         * the domain values for our changes.
 464         */
 465        GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
 466
 467        /* If we're writing through the CPU, then the GPU read domains will
 468         * need to be invalidated at next use.
 469         */
 470        if (write)
 471                __start_cpu_write(obj);
 472
 473        return 0;
 474}
 475
 476/**
 477 * Called when user space prepares to use an object with the CPU, either
 478 * through the mmap ioctl's mapping or a GTT mapping.
 479 * @dev: drm device
 480 * @data: ioctl data blob
 481 * @file: drm file
 482 */
 483int
 484i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 485                          struct drm_file *file)
 486{
 487        struct drm_i915_gem_set_domain *args = data;
 488        struct drm_i915_gem_object *obj;
 489        u32 read_domains = args->read_domains;
 490        u32 write_domain = args->write_domain;
 491        int err;
 492
 493        /* Only handle setting domains to types used by the CPU. */
 494        if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
 495                return -EINVAL;
 496
 497        /*
 498         * Having something in the write domain implies it's in the read
 499         * domain, and only that read domain.  Enforce that in the request.
 500         */
 501        if (write_domain && read_domains != write_domain)
 502                return -EINVAL;
 503
 504        if (!read_domains)
 505                return 0;
 506
 507        obj = i915_gem_object_lookup(file, args->handle);
 508        if (!obj)
 509                return -ENOENT;
 510
 511        /*
 512         * Try to flush the object off the GPU without holding the lock.
 513         * We will repeat the flush holding the lock in the normal manner
 514         * to catch cases where we are gazumped.
 515         */
 516        err = i915_gem_object_wait(obj,
 517                                   I915_WAIT_INTERRUPTIBLE |
 518                                   I915_WAIT_PRIORITY |
 519                                   (write_domain ? I915_WAIT_ALL : 0),
 520                                   MAX_SCHEDULE_TIMEOUT);
 521        if (err)
 522                goto out;
 523
 524        /*
 525         * Proxy objects do not control access to the backing storage, ergo
 526         * they cannot be used as a means to manipulate the cache domain
 527         * tracking for that backing storage. The proxy object is always
 528         * considered to be outside of any cache domain.
 529         */
 530        if (i915_gem_object_is_proxy(obj)) {
 531                err = -ENXIO;
 532                goto out;
 533        }
 534
 535        /*
 536         * Flush and acquire obj->pages so that we are coherent through
 537         * direct access in memory with previous cached writes through
 538         * shmemfs and that our cache domain tracking remains valid.
 539         * For example, if the obj->filp was moved to swap without us
 540         * being notified and releasing the pages, we would mistakenly
 541         * continue to assume that the obj remained out of the CPU cached
 542         * domain.
 543         */
 544        err = i915_gem_object_pin_pages(obj);
 545        if (err)
 546                goto out;
 547
 548        /*
 549         * Already in the desired write domain? Nothing for us to do!
 550         *
 551         * We apply a little bit of cunning here to catch a broader set of
 552         * no-ops. If obj->write_domain is set, we must be in the same
 553         * obj->read_domains, and only that domain. Therefore, if that
 554         * obj->write_domain matches the request read_domains, we are
 555         * already in the same read/write domain and can skip the operation,
 556         * without having to further check the requested write_domain.
 557         */
 558        if (READ_ONCE(obj->write_domain) == read_domains)
 559                goto out_unpin;
 560
 561        err = i915_gem_object_lock_interruptible(obj, NULL);
 562        if (err)
 563                goto out_unpin;
 564
 565        if (read_domains & I915_GEM_DOMAIN_WC)
 566                err = i915_gem_object_set_to_wc_domain(obj, write_domain);
 567        else if (read_domains & I915_GEM_DOMAIN_GTT)
 568                err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
 569        else
 570                err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
 571
 572        /* And bump the LRU for this access */
 573        i915_gem_object_bump_inactive_ggtt(obj);
 574
 575        i915_gem_object_unlock(obj);
 576
 577        if (write_domain)
 578                i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
 579
 580out_unpin:
 581        i915_gem_object_unpin_pages(obj);
 582out:
 583        i915_gem_object_put(obj);
 584        return err;
 585}
 586
 587/*
 588 * Pins the specified object's pages and synchronizes the object with
 589 * GPU accesses. Sets needs_clflush to non-zero if the caller should
 590 * flush the object from the CPU cache.
 591 */
 592int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 593                                 unsigned int *needs_clflush)
 594{
 595        int ret;
 596
 597        *needs_clflush = 0;
 598        if (!i915_gem_object_has_struct_page(obj))
 599                return -ENODEV;
 600
 601        assert_object_held(obj);
 602
 603        ret = i915_gem_object_wait(obj,
 604                                   I915_WAIT_INTERRUPTIBLE,
 605                                   MAX_SCHEDULE_TIMEOUT);
 606        if (ret)
 607                return ret;
 608
 609        ret = i915_gem_object_pin_pages(obj);
 610        if (ret)
 611                return ret;
 612
 613        if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
 614            !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 615                ret = i915_gem_object_set_to_cpu_domain(obj, false);
 616                if (ret)
 617                        goto err_unpin;
 618                else
 619                        goto out;
 620        }
 621
 622        i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 623
 624        /* If we're not in the cpu read domain, set ourself into the gtt
 625         * read domain and manually flush cachelines (if required). This
 626         * optimizes for the case when the gpu will dirty the data
 627         * anyway again before the next pread happens.
 628         */
 629        if (!obj->cache_dirty &&
 630            !(obj->read_domains & I915_GEM_DOMAIN_CPU))
 631                *needs_clflush = CLFLUSH_BEFORE;
 632
 633out:
 634        /* return with the pages pinned */
 635        return 0;
 636
 637err_unpin:
 638        i915_gem_object_unpin_pages(obj);
 639        return ret;
 640}
 641
 642int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 643                                  unsigned int *needs_clflush)
 644{
 645        int ret;
 646
 647        *needs_clflush = 0;
 648        if (!i915_gem_object_has_struct_page(obj))
 649                return -ENODEV;
 650
 651        assert_object_held(obj);
 652
 653        ret = i915_gem_object_wait(obj,
 654                                   I915_WAIT_INTERRUPTIBLE |
 655                                   I915_WAIT_ALL,
 656                                   MAX_SCHEDULE_TIMEOUT);
 657        if (ret)
 658                return ret;
 659
 660        ret = i915_gem_object_pin_pages(obj);
 661        if (ret)
 662                return ret;
 663
 664        if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
 665            !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 666                ret = i915_gem_object_set_to_cpu_domain(obj, true);
 667                if (ret)
 668                        goto err_unpin;
 669                else
 670                        goto out;
 671        }
 672
 673        i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 674
 675        /* If we're not in the cpu write domain, set ourself into the
 676         * gtt write domain and manually flush cachelines (as required).
 677         * This optimizes for the case when the gpu will use the data
 678         * right away and we therefore have to clflush anyway.
 679         */
 680        if (!obj->cache_dirty) {
 681                *needs_clflush |= CLFLUSH_AFTER;
 682
 683                /*
 684                 * Same trick applies to invalidate partially written
 685                 * cachelines read before writing.
 686                 */
 687                if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
 688                        *needs_clflush |= CLFLUSH_BEFORE;
 689        }
 690
 691out:
 692        i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
 693        obj->mm.dirty = true;
 694        /* return with the pages pinned */
 695        return 0;
 696
 697err_unpin:
 698        i915_gem_object_unpin_pages(obj);
 699        return ret;
 700}
 701