linux/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2008-2015 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 */
  23
  24#include "i915_drv.h"
  25#include "i915_scatterlist.h"
  26#include "i915_pvinfo.h"
  27#include "i915_vgpu.h"
  28
  29/**
  30 * DOC: fence register handling
  31 *
  32 * Important to avoid confusions: "fences" in the i915 driver are not execution
  33 * fences used to track command completion but hardware detiler objects which
  34 * wrap a given range of the global GTT. Each platform has only a fairly limited
  35 * set of these objects.
  36 *
  37 * Fences are used to detile GTT memory mappings. They're also connected to the
  38 * hardware frontbuffer render tracking and hence interact with frontbuffer
  39 * compression. Furthermore on older platforms fences are required for tiled
  40 * objects used by the display engine. They can also be used by the render
  41 * engine - they're required for blitter commands and are optional for render
  42 * commands. But on gen4+ both display (with the exception of fbc) and rendering
  43 * have their own tiling state bits and don't need fences.
  44 *
  45 * Also note that fences only support X and Y tiling and hence can't be used for
  46 * the fancier new tiling formats like W, Ys and Yf.
  47 *
  48 * Finally note that because fences are such a restricted resource they're
  49 * dynamically associated with objects. Furthermore fence state is committed to
  50 * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must
  51 * explicitly call i915_gem_object_get_fence() to synchronize fencing status
  52 * for cpu access. Also note that some code wants an unfenced view, for those
  53 * cases the fence can be removed forcefully with i915_gem_object_put_fence().
  54 *
  55 * Internally these functions will synchronize with userspace access by removing
  56 * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
  57 */
  58
  59#define pipelined 0
  60
  61static struct drm_i915_private *fence_to_i915(struct i915_fence_reg *fence)
  62{
  63        return fence->ggtt->vm.i915;
  64}
  65
  66static struct intel_uncore *fence_to_uncore(struct i915_fence_reg *fence)
  67{
  68        return fence->ggtt->vm.gt->uncore;
  69}
  70
  71static void i965_write_fence_reg(struct i915_fence_reg *fence)
  72{
  73        i915_reg_t fence_reg_lo, fence_reg_hi;
  74        int fence_pitch_shift;
  75        u64 val;
  76
  77        if (INTEL_GEN(fence_to_i915(fence)) >= 6) {
  78                fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
  79                fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
  80                fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
  81
  82        } else {
  83                fence_reg_lo = FENCE_REG_965_LO(fence->id);
  84                fence_reg_hi = FENCE_REG_965_HI(fence->id);
  85                fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
  86        }
  87
  88        val = 0;
  89        if (fence->tiling) {
  90                unsigned int stride = fence->stride;
  91
  92                GEM_BUG_ON(!IS_ALIGNED(stride, 128));
  93
  94                val = fence->start + fence->size - I965_FENCE_PAGE;
  95                val <<= 32;
  96                val |= fence->start;
  97                val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
  98                if (fence->tiling == I915_TILING_Y)
  99                        val |= BIT(I965_FENCE_TILING_Y_SHIFT);
 100                val |= I965_FENCE_REG_VALID;
 101        }
 102
 103        if (!pipelined) {
 104                struct intel_uncore *uncore = fence_to_uncore(fence);
 105
 106                /*
 107                 * To w/a incoherency with non-atomic 64-bit register updates,
 108                 * we split the 64-bit update into two 32-bit writes. In order
 109                 * for a partial fence not to be evaluated between writes, we
 110                 * precede the update with write to turn off the fence register,
 111                 * and only enable the fence as the last step.
 112                 *
 113                 * For extra levels of paranoia, we make sure each step lands
 114                 * before applying the next step.
 115                 */
 116                intel_uncore_write_fw(uncore, fence_reg_lo, 0);
 117                intel_uncore_posting_read_fw(uncore, fence_reg_lo);
 118
 119                intel_uncore_write_fw(uncore, fence_reg_hi, upper_32_bits(val));
 120                intel_uncore_write_fw(uncore, fence_reg_lo, lower_32_bits(val));
 121                intel_uncore_posting_read_fw(uncore, fence_reg_lo);
 122        }
 123}
 124
 125static void i915_write_fence_reg(struct i915_fence_reg *fence)
 126{
 127        u32 val;
 128
 129        val = 0;
 130        if (fence->tiling) {
 131                unsigned int stride = fence->stride;
 132                unsigned int tiling = fence->tiling;
 133                bool is_y_tiled = tiling == I915_TILING_Y;
 134
 135                if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence_to_i915(fence)))
 136                        stride /= 128;
 137                else
 138                        stride /= 512;
 139                GEM_BUG_ON(!is_power_of_2(stride));
 140
 141                val = fence->start;
 142                if (is_y_tiled)
 143                        val |= BIT(I830_FENCE_TILING_Y_SHIFT);
 144                val |= I915_FENCE_SIZE_BITS(fence->size);
 145                val |= ilog2(stride) << I830_FENCE_PITCH_SHIFT;
 146
 147                val |= I830_FENCE_REG_VALID;
 148        }
 149
 150        if (!pipelined) {
 151                struct intel_uncore *uncore = fence_to_uncore(fence);
 152                i915_reg_t reg = FENCE_REG(fence->id);
 153
 154                intel_uncore_write_fw(uncore, reg, val);
 155                intel_uncore_posting_read_fw(uncore, reg);
 156        }
 157}
 158
 159static void i830_write_fence_reg(struct i915_fence_reg *fence)
 160{
 161        u32 val;
 162
 163        val = 0;
 164        if (fence->tiling) {
 165                unsigned int stride = fence->stride;
 166
 167                val = fence->start;
 168                if (fence->tiling == I915_TILING_Y)
 169                        val |= BIT(I830_FENCE_TILING_Y_SHIFT);
 170                val |= I830_FENCE_SIZE_BITS(fence->size);
 171                val |= ilog2(stride / 128) << I830_FENCE_PITCH_SHIFT;
 172                val |= I830_FENCE_REG_VALID;
 173        }
 174
 175        if (!pipelined) {
 176                struct intel_uncore *uncore = fence_to_uncore(fence);
 177                i915_reg_t reg = FENCE_REG(fence->id);
 178
 179                intel_uncore_write_fw(uncore, reg, val);
 180                intel_uncore_posting_read_fw(uncore, reg);
 181        }
 182}
 183
 184static void fence_write(struct i915_fence_reg *fence)
 185{
 186        struct drm_i915_private *i915 = fence_to_i915(fence);
 187
 188        /*
 189         * Previous access through the fence register is marshalled by
 190         * the mb() inside the fault handlers (i915_gem_release_mmaps)
 191         * and explicitly managed for internal users.
 192         */
 193
 194        if (IS_GEN(i915, 2))
 195                i830_write_fence_reg(fence);
 196        else if (IS_GEN(i915, 3))
 197                i915_write_fence_reg(fence);
 198        else
 199                i965_write_fence_reg(fence);
 200
 201        /*
 202         * Access through the fenced region afterwards is
 203         * ordered by the posting reads whilst writing the registers.
 204         */
 205}
 206
 207static bool gpu_uses_fence_registers(struct i915_fence_reg *fence)
 208{
 209        return INTEL_GEN(fence_to_i915(fence)) < 4;
 210}
 211
 212static int fence_update(struct i915_fence_reg *fence,
 213                        struct i915_vma *vma)
 214{
 215        struct i915_ggtt *ggtt = fence->ggtt;
 216        struct intel_uncore *uncore = fence_to_uncore(fence);
 217        intel_wakeref_t wakeref;
 218        struct i915_vma *old;
 219        int ret;
 220
 221        fence->tiling = 0;
 222        if (vma) {
 223                GEM_BUG_ON(!i915_gem_object_get_stride(vma->obj) ||
 224                           !i915_gem_object_get_tiling(vma->obj));
 225
 226                if (!i915_vma_is_map_and_fenceable(vma))
 227                        return -EINVAL;
 228
 229                if (gpu_uses_fence_registers(fence)) {
 230                        /* implicit 'unfenced' GPU blits */
 231                        ret = i915_vma_sync(vma);
 232                        if (ret)
 233                                return ret;
 234                }
 235
 236                fence->start = vma->node.start;
 237                fence->size = vma->fence_size;
 238                fence->stride = i915_gem_object_get_stride(vma->obj);
 239                fence->tiling = i915_gem_object_get_tiling(vma->obj);
 240        }
 241        WRITE_ONCE(fence->dirty, false);
 242
 243        old = xchg(&fence->vma, NULL);
 244        if (old) {
 245                /* XXX Ideally we would move the waiting to outside the mutex */
 246                ret = i915_active_wait(&fence->active);
 247                if (ret) {
 248                        fence->vma = old;
 249                        return ret;
 250                }
 251
 252                i915_vma_flush_writes(old);
 253
 254                /*
 255                 * Ensure that all userspace CPU access is completed before
 256                 * stealing the fence.
 257                 */
 258                if (old != vma) {
 259                        GEM_BUG_ON(old->fence != fence);
 260                        i915_vma_revoke_mmap(old);
 261                        old->fence = NULL;
 262                }
 263
 264                list_move(&fence->link, &ggtt->fence_list);
 265        }
 266
 267        /*
 268         * We only need to update the register itself if the device is awake.
 269         * If the device is currently powered down, we will defer the write
 270         * to the runtime resume, see intel_ggtt_restore_fences().
 271         *
 272         * This only works for removing the fence register, on acquisition
 273         * the caller must hold the rpm wakeref. The fence register must
 274         * be cleared before we can use any other fences to ensure that
 275         * the new fences do not overlap the elided clears, confusing HW.
 276         */
 277        wakeref = intel_runtime_pm_get_if_in_use(uncore->rpm);
 278        if (!wakeref) {
 279                GEM_BUG_ON(vma);
 280                return 0;
 281        }
 282
 283        WRITE_ONCE(fence->vma, vma);
 284        fence_write(fence);
 285
 286        if (vma) {
 287                vma->fence = fence;
 288                list_move_tail(&fence->link, &ggtt->fence_list);
 289        }
 290
 291        intel_runtime_pm_put(uncore->rpm, wakeref);
 292        return 0;
 293}
 294
 295/**
 296 * i915_vma_revoke_fence - force-remove fence for a VMA
 297 * @vma: vma to map linearly (not through a fence reg)
 298 *
 299 * This function force-removes any fence from the given object, which is useful
 300 * if the kernel wants to do untiled GTT access.
 301 */
 302void i915_vma_revoke_fence(struct i915_vma *vma)
 303{
 304        struct i915_fence_reg *fence = vma->fence;
 305        intel_wakeref_t wakeref;
 306
 307        lockdep_assert_held(&vma->vm->mutex);
 308        if (!fence)
 309                return;
 310
 311        GEM_BUG_ON(fence->vma != vma);
 312        GEM_BUG_ON(!i915_active_is_idle(&fence->active));
 313        GEM_BUG_ON(atomic_read(&fence->pin_count));
 314
 315        fence->tiling = 0;
 316        WRITE_ONCE(fence->vma, NULL);
 317        vma->fence = NULL;
 318
 319        with_intel_runtime_pm_if_in_use(fence_to_uncore(fence)->rpm, wakeref)
 320                fence_write(fence);
 321}
 322
 323static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
 324{
 325        struct i915_fence_reg *fence;
 326
 327        list_for_each_entry(fence, &ggtt->fence_list, link) {
 328                GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
 329
 330                if (atomic_read(&fence->pin_count))
 331                        continue;
 332
 333                return fence;
 334        }
 335
 336        /* Wait for completion of pending flips which consume fences */
 337        if (intel_has_pending_fb_unpin(ggtt->vm.i915))
 338                return ERR_PTR(-EAGAIN);
 339
 340        return ERR_PTR(-EDEADLK);
 341}
 342
 343int __i915_vma_pin_fence(struct i915_vma *vma)
 344{
 345        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
 346        struct i915_fence_reg *fence;
 347        struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
 348        int err;
 349
 350        lockdep_assert_held(&vma->vm->mutex);
 351
 352        /* Just update our place in the LRU if our fence is getting reused. */
 353        if (vma->fence) {
 354                fence = vma->fence;
 355                GEM_BUG_ON(fence->vma != vma);
 356                atomic_inc(&fence->pin_count);
 357                if (!fence->dirty) {
 358                        list_move_tail(&fence->link, &ggtt->fence_list);
 359                        return 0;
 360                }
 361        } else if (set) {
 362                fence = fence_find(ggtt);
 363                if (IS_ERR(fence))
 364                        return PTR_ERR(fence);
 365
 366                GEM_BUG_ON(atomic_read(&fence->pin_count));
 367                atomic_inc(&fence->pin_count);
 368        } else {
 369                return 0;
 370        }
 371
 372        err = fence_update(fence, set);
 373        if (err)
 374                goto out_unpin;
 375
 376        GEM_BUG_ON(fence->vma != set);
 377        GEM_BUG_ON(vma->fence != (set ? fence : NULL));
 378
 379        if (set)
 380                return 0;
 381
 382out_unpin:
 383        atomic_dec(&fence->pin_count);
 384        return err;
 385}
 386
 387/**
 388 * i915_vma_pin_fence - set up fencing for a vma
 389 * @vma: vma to map through a fence reg
 390 *
 391 * When mapping objects through the GTT, userspace wants to be able to write
 392 * to them without having to worry about swizzling if the object is tiled.
 393 * This function walks the fence regs looking for a free one for @obj,
 394 * stealing one if it can't find any.
 395 *
 396 * It then sets up the reg based on the object's properties: address, pitch
 397 * and tiling format.
 398 *
 399 * For an untiled surface, this removes any existing fence.
 400 *
 401 * Returns:
 402 *
 403 * 0 on success, negative error code on failure.
 404 */
 405int i915_vma_pin_fence(struct i915_vma *vma)
 406{
 407        int err;
 408
 409        if (!vma->fence && !i915_gem_object_is_tiled(vma->obj))
 410                return 0;
 411
 412        /*
 413         * Note that we revoke fences on runtime suspend. Therefore the user
 414         * must keep the device awake whilst using the fence.
 415         */
 416        assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm);
 417        GEM_BUG_ON(!i915_vma_is_pinned(vma));
 418        GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 419
 420        err = mutex_lock_interruptible(&vma->vm->mutex);
 421        if (err)
 422                return err;
 423
 424        err = __i915_vma_pin_fence(vma);
 425        mutex_unlock(&vma->vm->mutex);
 426
 427        return err;
 428}
 429
 430/**
 431 * i915_reserve_fence - Reserve a fence for vGPU
 432 * @ggtt: Global GTT
 433 *
 434 * This function walks the fence regs looking for a free one and remove
 435 * it from the fence_list. It is used to reserve fence for vGPU to use.
 436 */
 437struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt)
 438{
 439        struct i915_fence_reg *fence;
 440        int count;
 441        int ret;
 442
 443        lockdep_assert_held(&ggtt->vm.mutex);
 444
 445        /* Keep at least one fence available for the display engine. */
 446        count = 0;
 447        list_for_each_entry(fence, &ggtt->fence_list, link)
 448                count += !atomic_read(&fence->pin_count);
 449        if (count <= 1)
 450                return ERR_PTR(-ENOSPC);
 451
 452        fence = fence_find(ggtt);
 453        if (IS_ERR(fence))
 454                return fence;
 455
 456        if (fence->vma) {
 457                /* Force-remove fence from VMA */
 458                ret = fence_update(fence, NULL);
 459                if (ret)
 460                        return ERR_PTR(ret);
 461        }
 462
 463        list_del(&fence->link);
 464
 465        return fence;
 466}
 467
 468/**
 469 * i915_unreserve_fence - Reclaim a reserved fence
 470 * @fence: the fence reg
 471 *
 472 * This function add a reserved fence register from vGPU to the fence_list.
 473 */
 474void i915_unreserve_fence(struct i915_fence_reg *fence)
 475{
 476        struct i915_ggtt *ggtt = fence->ggtt;
 477
 478        lockdep_assert_held(&ggtt->vm.mutex);
 479
 480        list_add(&fence->link, &ggtt->fence_list);
 481}
 482
 483/**
 484 * intel_ggtt_restore_fences - restore fence state
 485 * @ggtt: Global GTT
 486 *
 487 * Restore the hw fence state to match the software tracking again, to be called
 488 * after a gpu reset and on resume. Note that on runtime suspend we only cancel
 489 * the fences, to be reacquired by the user later.
 490 */
 491void intel_ggtt_restore_fences(struct i915_ggtt *ggtt)
 492{
 493        int i;
 494
 495        for (i = 0; i < ggtt->num_fences; i++)
 496                fence_write(&ggtt->fence_regs[i]);
 497}
 498
 499/**
 500 * DOC: tiling swizzling details
 501 *
 502 * The idea behind tiling is to increase cache hit rates by rearranging
 503 * pixel data so that a group of pixel accesses are in the same cacheline.
 504 * Performance improvement from doing this on the back/depth buffer are on
 505 * the order of 30%.
 506 *
 507 * Intel architectures make this somewhat more complicated, though, by
 508 * adjustments made to addressing of data when the memory is in interleaved
 509 * mode (matched pairs of DIMMS) to improve memory bandwidth.
 510 * For interleaved memory, the CPU sends every sequential 64 bytes
 511 * to an alternate memory channel so it can get the bandwidth from both.
 512 *
 513 * The GPU also rearranges its accesses for increased bandwidth to interleaved
 514 * memory, and it matches what the CPU does for non-tiled.  However, when tiled
 515 * it does it a little differently, since one walks addresses not just in the
 516 * X direction but also Y.  So, along with alternating channels when bit
 517 * 6 of the address flips, it also alternates when other bits flip --  Bits 9
 518 * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
 519 * are common to both the 915 and 965-class hardware.
 520 *
 521 * The CPU also sometimes XORs in higher bits as well, to improve
 522 * bandwidth doing strided access like we do so frequently in graphics.  This
 523 * is called "Channel XOR Randomization" in the MCH documentation.  The result
 524 * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
 525 * decode.
 526 *
 527 * All of this bit 6 XORing has an effect on our memory management,
 528 * as we need to make sure that the 3d driver can correctly address object
 529 * contents.
 530 *
 531 * If we don't have interleaved memory, all tiling is safe and no swizzling is
 532 * required.
 533 *
 534 * When bit 17 is XORed in, we simply refuse to tile at all.  Bit
 535 * 17 is not just a page offset, so as we page an object out and back in,
 536 * individual pages in it will have different bit 17 addresses, resulting in
 537 * each 64 bytes being swapped with its neighbor!
 538 *
 539 * Otherwise, if interleaved, we have to tell the 3d driver what the address
 540 * swizzling it needs to do is, since it's writing with the CPU to the pages
 541 * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
 542 * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
 543 * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
 544 * to match what the GPU expects.
 545 */
 546
 547/**
 548 * detect_bit_6_swizzle - detect bit 6 swizzling pattern
 549 * @ggtt: Global GGTT
 550 *
 551 * Detects bit 6 swizzling of address lookup between IGD access and CPU
 552 * access through main memory.
 553 */
 554static void detect_bit_6_swizzle(struct i915_ggtt *ggtt)
 555{
 556        struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 557        struct drm_i915_private *i915 = ggtt->vm.i915;
 558        u32 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 559        u32 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 560
 561        if (INTEL_GEN(i915) >= 8 || IS_VALLEYVIEW(i915)) {
 562                /*
 563                 * On BDW+, swizzling is not used. We leave the CPU memory
 564                 * controller in charge of optimizing memory accesses without
 565                 * the extra address manipulation GPU side.
 566                 *
 567                 * VLV and CHV don't have GPU swizzling.
 568                 */
 569                swizzle_x = I915_BIT_6_SWIZZLE_NONE;
 570                swizzle_y = I915_BIT_6_SWIZZLE_NONE;
 571        } else if (INTEL_GEN(i915) >= 6) {
 572                if (i915->preserve_bios_swizzle) {
 573                        if (intel_uncore_read(uncore, DISP_ARB_CTL) &
 574                            DISP_TILE_SURFACE_SWIZZLING) {
 575                                swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 576                                swizzle_y = I915_BIT_6_SWIZZLE_9;
 577                        } else {
 578                                swizzle_x = I915_BIT_6_SWIZZLE_NONE;
 579                                swizzle_y = I915_BIT_6_SWIZZLE_NONE;
 580                        }
 581                } else {
 582                        u32 dimm_c0, dimm_c1;
 583                        dimm_c0 = intel_uncore_read(uncore, MAD_DIMM_C0);
 584                        dimm_c1 = intel_uncore_read(uncore, MAD_DIMM_C1);
 585                        dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
 586                        dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
 587                        /*
 588                         * Enable swizzling when the channels are populated
 589                         * with identically sized dimms. We don't need to check
 590                         * the 3rd channel because no cpu with gpu attached
 591                         * ships in that configuration. Also, swizzling only
 592                         * makes sense for 2 channels anyway.
 593                         */
 594                        if (dimm_c0 == dimm_c1) {
 595                                swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 596                                swizzle_y = I915_BIT_6_SWIZZLE_9;
 597                        } else {
 598                                swizzle_x = I915_BIT_6_SWIZZLE_NONE;
 599                                swizzle_y = I915_BIT_6_SWIZZLE_NONE;
 600                        }
 601                }
 602        } else if (IS_GEN(i915, 5)) {
 603                /*
 604                 * On Ironlake whatever DRAM config, GPU always do
 605                 * same swizzling setup.
 606                 */
 607                swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 608                swizzle_y = I915_BIT_6_SWIZZLE_9;
 609        } else if (IS_GEN(i915, 2)) {
 610                /*
 611                 * As far as we know, the 865 doesn't have these bit 6
 612                 * swizzling issues.
 613                 */
 614                swizzle_x = I915_BIT_6_SWIZZLE_NONE;
 615                swizzle_y = I915_BIT_6_SWIZZLE_NONE;
 616        } else if (IS_G45(i915) || IS_I965G(i915) || IS_G33(i915)) {
 617                /*
 618                 * The 965, G33, and newer, have a very flexible memory
 619                 * configuration.  It will enable dual-channel mode
 620                 * (interleaving) on as much memory as it can, and the GPU
 621                 * will additionally sometimes enable different bit 6
 622                 * swizzling for tiled objects from the CPU.
 623                 *
 624                 * Here's what I found on the G965:
 625                 *    slot fill         memory size  swizzling
 626                 * 0A   0B   1A   1B    1-ch   2-ch
 627                 * 512  0    0    0     512    0     O
 628                 * 512  0    512  0     16     1008  X
 629                 * 512  0    0    512   16     1008  X
 630                 * 0    512  0    512   16     1008  X
 631                 * 1024 1024 1024 0     2048   1024  O
 632                 *
 633                 * We could probably detect this based on either the DRB
 634                 * matching, which was the case for the swizzling required in
 635                 * the table above, or from the 1-ch value being less than
 636                 * the minimum size of a rank.
 637                 *
 638                 * Reports indicate that the swizzling actually
 639                 * varies depending upon page placement inside the
 640                 * channels, i.e. we see swizzled pages where the
 641                 * banks of memory are paired and unswizzled on the
 642                 * uneven portion, so leave that as unknown.
 643                 */
 644                if (intel_uncore_read(uncore, C0DRB3) ==
 645                    intel_uncore_read(uncore, C1DRB3)) {
 646                        swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 647                        swizzle_y = I915_BIT_6_SWIZZLE_9;
 648                }
 649        } else {
 650                u32 dcc = intel_uncore_read(uncore, DCC);
 651
 652                /*
 653                 * On 9xx chipsets, channel interleave by the CPU is
 654                 * determined by DCC.  For single-channel, neither the CPU
 655                 * nor the GPU do swizzling.  For dual channel interleaved,
 656                 * the GPU's interleave is bit 9 and 10 for X tiled, and bit
 657                 * 9 for Y tiled.  The CPU's interleave is independent, and
 658                 * can be based on either bit 11 (haven't seen this yet) or
 659                 * bit 17 (common).
 660                 */
 661                switch (dcc & DCC_ADDRESSING_MODE_MASK) {
 662                case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
 663                case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
 664                        swizzle_x = I915_BIT_6_SWIZZLE_NONE;
 665                        swizzle_y = I915_BIT_6_SWIZZLE_NONE;
 666                        break;
 667                case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
 668                        if (dcc & DCC_CHANNEL_XOR_DISABLE) {
 669                                /*
 670                                 * This is the base swizzling by the GPU for
 671                                 * tiled buffers.
 672                                 */
 673                                swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 674                                swizzle_y = I915_BIT_6_SWIZZLE_9;
 675                        } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
 676                                /* Bit 11 swizzling by the CPU in addition. */
 677                                swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
 678                                swizzle_y = I915_BIT_6_SWIZZLE_9_11;
 679                        } else {
 680                                /* Bit 17 swizzling by the CPU in addition. */
 681                                swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
 682                                swizzle_y = I915_BIT_6_SWIZZLE_9_17;
 683                        }
 684                        break;
 685                }
 686
 687                /* check for L-shaped memory aka modified enhanced addressing */
 688                if (IS_GEN(i915, 4) &&
 689                    !(intel_uncore_read(uncore, DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
 690                        swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 691                        swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 692                }
 693
 694                if (dcc == 0xffffffff) {
 695                        drm_err(&i915->drm, "Couldn't read from MCHBAR.  "
 696                                  "Disabling tiling.\n");
 697                        swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 698                        swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 699                }
 700        }
 701
 702        if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
 703            swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
 704                /*
 705                 * Userspace likes to explode if it sees unknown swizzling,
 706                 * so lie. We will finish the lie when reporting through
 707                 * the get-tiling-ioctl by reporting the physical swizzle
 708                 * mode as unknown instead.
 709                 *
 710                 * As we don't strictly know what the swizzling is, it may be
 711                 * bit17 dependent, and so we need to also prevent the pages
 712                 * from being moved.
 713                 */
 714                i915->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
 715                swizzle_x = I915_BIT_6_SWIZZLE_NONE;
 716                swizzle_y = I915_BIT_6_SWIZZLE_NONE;
 717        }
 718
 719        i915->ggtt.bit_6_swizzle_x = swizzle_x;
 720        i915->ggtt.bit_6_swizzle_y = swizzle_y;
 721}
 722
 723/*
 724 * Swap every 64 bytes of this page around, to account for it having a new
 725 * bit 17 of its physical address and therefore being interpreted differently
 726 * by the GPU.
 727 */
 728static void swizzle_page(struct page *page)
 729{
 730        char temp[64];
 731        char *vaddr;
 732        int i;
 733
 734        vaddr = kmap(page);
 735
 736        for (i = 0; i < PAGE_SIZE; i += 128) {
 737                memcpy(temp, &vaddr[i], 64);
 738                memcpy(&vaddr[i], &vaddr[i + 64], 64);
 739                memcpy(&vaddr[i + 64], temp, 64);
 740        }
 741
 742        kunmap(page);
 743}
 744
 745/**
 746 * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
 747 * @obj: i915 GEM buffer object
 748 * @pages: the scattergather list of physical pages
 749 *
 750 * This function fixes up the swizzling in case any page frame number for this
 751 * object has changed in bit 17 since that state has been saved with
 752 * i915_gem_object_save_bit_17_swizzle().
 753 *
 754 * This is called when pinning backing storage again, since the kernel is free
 755 * to move unpinned backing storage around (either by directly moving pages or
 756 * by swapping them out and back in again).
 757 */
 758void
 759i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
 760                                  struct sg_table *pages)
 761{
 762        struct sgt_iter sgt_iter;
 763        struct page *page;
 764        int i;
 765
 766        if (obj->bit_17 == NULL)
 767                return;
 768
 769        i = 0;
 770        for_each_sgt_page(page, sgt_iter, pages) {
 771                char new_bit_17 = page_to_phys(page) >> 17;
 772                if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) {
 773                        swizzle_page(page);
 774                        set_page_dirty(page);
 775                }
 776                i++;
 777        }
 778}
 779
 780/**
 781 * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
 782 * @obj: i915 GEM buffer object
 783 * @pages: the scattergather list of physical pages
 784 *
 785 * This function saves the bit 17 of each page frame number so that swizzling
 786 * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
 787 * be called before the backing storage can be unpinned.
 788 */
 789void
 790i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
 791                                    struct sg_table *pages)
 792{
 793        const unsigned int page_count = obj->base.size >> PAGE_SHIFT;
 794        struct sgt_iter sgt_iter;
 795        struct page *page;
 796        int i;
 797
 798        if (obj->bit_17 == NULL) {
 799                obj->bit_17 = bitmap_zalloc(page_count, GFP_KERNEL);
 800                if (obj->bit_17 == NULL) {
 801                        DRM_ERROR("Failed to allocate memory for bit 17 "
 802                                  "record\n");
 803                        return;
 804                }
 805        }
 806
 807        i = 0;
 808
 809        for_each_sgt_page(page, sgt_iter, pages) {
 810                if (page_to_phys(page) & (1 << 17))
 811                        __set_bit(i, obj->bit_17);
 812                else
 813                        __clear_bit(i, obj->bit_17);
 814                i++;
 815        }
 816}
 817
 818void intel_ggtt_init_fences(struct i915_ggtt *ggtt)
 819{
 820        struct drm_i915_private *i915 = ggtt->vm.i915;
 821        struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 822        int num_fences;
 823        int i;
 824
 825        INIT_LIST_HEAD(&ggtt->fence_list);
 826        INIT_LIST_HEAD(&ggtt->userfault_list);
 827        intel_wakeref_auto_init(&ggtt->userfault_wakeref, uncore->rpm);
 828
 829        detect_bit_6_swizzle(ggtt);
 830
 831        if (!i915_ggtt_has_aperture(ggtt))
 832                num_fences = 0;
 833        else if (INTEL_GEN(i915) >= 7 &&
 834                 !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)))
 835                num_fences = 32;
 836        else if (INTEL_GEN(i915) >= 4 ||
 837                 IS_I945G(i915) || IS_I945GM(i915) ||
 838                 IS_G33(i915) || IS_PINEVIEW(i915))
 839                num_fences = 16;
 840        else
 841                num_fences = 8;
 842
 843        if (intel_vgpu_active(i915))
 844                num_fences = intel_uncore_read(uncore,
 845                                               vgtif_reg(avail_rs.fence_num));
 846        ggtt->fence_regs = kcalloc(num_fences,
 847                                   sizeof(*ggtt->fence_regs),
 848                                   GFP_KERNEL);
 849        if (!ggtt->fence_regs)
 850                num_fences = 0;
 851
 852        /* Initialize fence registers to zero */
 853        for (i = 0; i < num_fences; i++) {
 854                struct i915_fence_reg *fence = &ggtt->fence_regs[i];
 855
 856                i915_active_init(&fence->active, NULL, NULL);
 857                fence->ggtt = ggtt;
 858                fence->id = i;
 859                list_add_tail(&fence->link, &ggtt->fence_list);
 860        }
 861        ggtt->num_fences = num_fences;
 862
 863        intel_ggtt_restore_fences(ggtt);
 864}
 865
 866void intel_ggtt_fini_fences(struct i915_ggtt *ggtt)
 867{
 868        int i;
 869
 870        for (i = 0; i < ggtt->num_fences; i++) {
 871                struct i915_fence_reg *fence = &ggtt->fence_regs[i];
 872
 873                i915_active_fini(&fence->active);
 874        }
 875
 876        kfree(ggtt->fence_regs);
 877}
 878
 879void intel_gt_init_swizzling(struct intel_gt *gt)
 880{
 881        struct drm_i915_private *i915 = gt->i915;
 882        struct intel_uncore *uncore = gt->uncore;
 883
 884        if (INTEL_GEN(i915) < 5 ||
 885            i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
 886                return;
 887
 888        intel_uncore_rmw(uncore, DISP_ARB_CTL, 0, DISP_TILE_SURFACE_SWIZZLING);
 889
 890        if (IS_GEN(i915, 5))
 891                return;
 892
 893        intel_uncore_rmw(uncore, TILECTL, 0, TILECTL_SWZCTL);
 894
 895        if (IS_GEN(i915, 6))
 896                intel_uncore_write(uncore,
 897                                   ARB_MODE,
 898                                   _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
 899        else if (IS_GEN(i915, 7))
 900                intel_uncore_write(uncore,
 901                                   ARB_MODE,
 902                                   _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
 903        else if (IS_GEN(i915, 8))
 904                intel_uncore_write(uncore,
 905                                   GAMTARBMODE,
 906                                   _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
 907        else
 908                MISSING_CASE(INTEL_GEN(i915));
 909}
 910