linux/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR MIT
   2/**************************************************************************
   3 *
   4 * Copyright 2019 VMware, Inc., Palo Alto, CA., USA
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the
   8 * "Software"), to deal in the Software without restriction, including
   9 * without limitation the rights to use, copy, modify, merge, publish,
  10 * distribute, sub license, and/or sell copies of the Software, and to
  11 * permit persons to whom the Software is furnished to do so, subject to
  12 * the following conditions:
  13 *
  14 * The above copyright notice and this permission notice (including the
  15 * next paragraph) shall be included in all copies or substantial portions
  16 * of the Software.
  17 *
  18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
  25 *
  26 **************************************************************************/
  27#include "vmwgfx_drv.h"
  28
  29/*
  30 * Different methods for tracking dirty:
  31 * VMW_BO_DIRTY_PAGETABLE - Scan the pagetable for hardware dirty bits
  32 * VMW_BO_DIRTY_MKWRITE - Write-protect page table entries and record write-
  33 * accesses in the VM mkwrite() callback
  34 */
  35enum vmw_bo_dirty_method {
  36        VMW_BO_DIRTY_PAGETABLE,
  37        VMW_BO_DIRTY_MKWRITE,
  38};
  39
  40/*
  41 * No dirtied pages at scan trigger a transition to the _MKWRITE method,
  42 * similarly a certain percentage of dirty pages trigger a transition to
  43 * the _PAGETABLE method. How many triggers should we wait for before
  44 * changing method?
  45 */
  46#define VMW_DIRTY_NUM_CHANGE_TRIGGERS 2
  47
  48/* Percentage to trigger a transition to the _PAGETABLE method */
  49#define VMW_DIRTY_PERCENTAGE 10
  50
  51/**
  52 * struct vmw_bo_dirty - Dirty information for buffer objects
  53 * @start: First currently dirty bit
  54 * @end: Last currently dirty bit + 1
  55 * @method: The currently used dirty method
  56 * @change_count: Number of consecutive method change triggers
  57 * @ref_count: Reference count for this structure
  58 * @bitmap_size: The size of the bitmap in bits. Typically equal to the
  59 * nuber of pages in the bo.
  60 * @size: The accounting size for this struct.
  61 * @bitmap: A bitmap where each bit represents a page. A set bit means a
  62 * dirty page.
  63 */
  64struct vmw_bo_dirty {
  65        unsigned long start;
  66        unsigned long end;
  67        enum vmw_bo_dirty_method method;
  68        unsigned int change_count;
  69        unsigned int ref_count;
  70        unsigned long bitmap_size;
  71        size_t size;
  72        unsigned long bitmap[];
  73};
  74
  75/**
  76 * vmw_bo_dirty_scan_pagetable - Perform a pagetable scan for dirty bits
  77 * @vbo: The buffer object to scan
  78 *
  79 * Scans the pagetable for dirty bits. Clear those bits and modify the
  80 * dirty structure with the results. This function may change the
  81 * dirty-tracking method.
  82 */
  83static void vmw_bo_dirty_scan_pagetable(struct vmw_buffer_object *vbo)
  84{
  85        struct vmw_bo_dirty *dirty = vbo->dirty;
  86        pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node);
  87        struct address_space *mapping = vbo->base.bdev->dev_mapping;
  88        pgoff_t num_marked;
  89
  90        num_marked = clean_record_shared_mapping_range
  91                (mapping,
  92                 offset, dirty->bitmap_size,
  93                 offset, &dirty->bitmap[0],
  94                 &dirty->start, &dirty->end);
  95        if (num_marked == 0)
  96                dirty->change_count++;
  97        else
  98                dirty->change_count = 0;
  99
 100        if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) {
 101                dirty->change_count = 0;
 102                dirty->method = VMW_BO_DIRTY_MKWRITE;
 103                wp_shared_mapping_range(mapping,
 104                                        offset, dirty->bitmap_size);
 105                clean_record_shared_mapping_range(mapping,
 106                                                  offset, dirty->bitmap_size,
 107                                                  offset, &dirty->bitmap[0],
 108                                                  &dirty->start, &dirty->end);
 109        }
 110}
 111
 112/**
 113 * vmw_bo_dirty_scan_mkwrite - Reset the mkwrite dirty-tracking method
 114 * @vbo: The buffer object to scan
 115 *
 116 * Write-protect pages written to so that consecutive write accesses will
 117 * trigger a call to mkwrite.
 118 *
 119 * This function may change the dirty-tracking method.
 120 */
 121static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object *vbo)
 122{
 123        struct vmw_bo_dirty *dirty = vbo->dirty;
 124        unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
 125        struct address_space *mapping = vbo->base.bdev->dev_mapping;
 126        pgoff_t num_marked;
 127
 128        if (dirty->end <= dirty->start)
 129                return;
 130
 131        num_marked = wp_shared_mapping_range(vbo->base.bdev->dev_mapping,
 132                                        dirty->start + offset,
 133                                        dirty->end - dirty->start);
 134
 135        if (100UL * num_marked / dirty->bitmap_size >
 136            VMW_DIRTY_PERCENTAGE) {
 137                dirty->change_count++;
 138        } else {
 139                dirty->change_count = 0;
 140        }
 141
 142        if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) {
 143                pgoff_t start = 0;
 144                pgoff_t end = dirty->bitmap_size;
 145
 146                dirty->method = VMW_BO_DIRTY_PAGETABLE;
 147                clean_record_shared_mapping_range(mapping, offset, end, offset,
 148                                                  &dirty->bitmap[0],
 149                                                  &start, &end);
 150                bitmap_clear(&dirty->bitmap[0], 0, dirty->bitmap_size);
 151                if (dirty->start < dirty->end)
 152                        bitmap_set(&dirty->bitmap[0], dirty->start,
 153                                   dirty->end - dirty->start);
 154                dirty->change_count = 0;
 155        }
 156}
 157
 158/**
 159 * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty
 160 * tracking structure
 161 * @vbo: The buffer object to scan
 162 *
 163 * This function may change the dirty tracking method.
 164 */
 165void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo)
 166{
 167        struct vmw_bo_dirty *dirty = vbo->dirty;
 168
 169        if (dirty->method == VMW_BO_DIRTY_PAGETABLE)
 170                vmw_bo_dirty_scan_pagetable(vbo);
 171        else
 172                vmw_bo_dirty_scan_mkwrite(vbo);
 173}
 174
 175/**
 176 * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before
 177 * an unmap_mapping_range operation.
 178 * @vbo: The buffer object,
 179 * @start: First page of the range within the buffer object.
 180 * @end: Last page of the range within the buffer object + 1.
 181 *
 182 * If we're using the _PAGETABLE scan method, we may leak dirty pages
 183 * when calling unmap_mapping_range(). This function makes sure we pick
 184 * up all dirty pages.
 185 */
 186static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo,
 187                                   pgoff_t start, pgoff_t end)
 188{
 189        struct vmw_bo_dirty *dirty = vbo->dirty;
 190        unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
 191        struct address_space *mapping = vbo->base.bdev->dev_mapping;
 192
 193        if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end)
 194                return;
 195
 196        wp_shared_mapping_range(mapping, start + offset, end - start);
 197        clean_record_shared_mapping_range(mapping, start + offset,
 198                                          end - start, offset,
 199                                          &dirty->bitmap[0], &dirty->start,
 200                                          &dirty->end);
 201}
 202
 203/**
 204 * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo
 205 * @vbo: The buffer object,
 206 * @start: First page of the range within the buffer object.
 207 * @end: Last page of the range within the buffer object + 1.
 208 *
 209 * This is similar to ttm_bo_unmap_virtual() except it takes a subrange.
 210 */
 211void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
 212                        pgoff_t start, pgoff_t end)
 213{
 214        unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
 215        struct address_space *mapping = vbo->base.bdev->dev_mapping;
 216
 217        vmw_bo_dirty_pre_unmap(vbo, start, end);
 218        unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT,
 219                                   (loff_t) (end - start) << PAGE_SHIFT);
 220}
 221
 222/**
 223 * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object
 224 * @vbo: The buffer object
 225 *
 226 * This function registers a dirty-tracking user to a buffer object.
 227 * A user can be for example a resource or a vma in a special user-space
 228 * mapping.
 229 *
 230 * Return: Zero on success, -ENOMEM on memory allocation failure.
 231 */
 232int vmw_bo_dirty_add(struct vmw_buffer_object *vbo)
 233{
 234        struct vmw_bo_dirty *dirty = vbo->dirty;
 235        pgoff_t num_pages = vbo->base.mem.num_pages;
 236        size_t size, acc_size;
 237        int ret;
 238        static struct ttm_operation_ctx ctx = {
 239                .interruptible = false,
 240                .no_wait_gpu = false
 241        };
 242
 243        if (dirty) {
 244                dirty->ref_count++;
 245                return 0;
 246        }
 247
 248        size = sizeof(*dirty) + BITS_TO_LONGS(num_pages) * sizeof(long);
 249        acc_size = ttm_round_pot(size);
 250        ret = ttm_mem_global_alloc(&ttm_mem_glob, acc_size, &ctx);
 251        if (ret) {
 252                VMW_DEBUG_USER("Out of graphics memory for buffer object "
 253                               "dirty tracker.\n");
 254                return ret;
 255        }
 256        dirty = kvzalloc(size, GFP_KERNEL);
 257        if (!dirty) {
 258                ret = -ENOMEM;
 259                goto out_no_dirty;
 260        }
 261
 262        dirty->size = acc_size;
 263        dirty->bitmap_size = num_pages;
 264        dirty->start = dirty->bitmap_size;
 265        dirty->end = 0;
 266        dirty->ref_count = 1;
 267        if (num_pages < PAGE_SIZE / sizeof(pte_t)) {
 268                dirty->method = VMW_BO_DIRTY_PAGETABLE;
 269        } else {
 270                struct address_space *mapping = vbo->base.bdev->dev_mapping;
 271                pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node);
 272
 273                dirty->method = VMW_BO_DIRTY_MKWRITE;
 274
 275                /* Write-protect and then pick up already dirty bits */
 276                wp_shared_mapping_range(mapping, offset, num_pages);
 277                clean_record_shared_mapping_range(mapping, offset, num_pages,
 278                                                  offset,
 279                                                  &dirty->bitmap[0],
 280                                                  &dirty->start, &dirty->end);
 281        }
 282
 283        vbo->dirty = dirty;
 284
 285        return 0;
 286
 287out_no_dirty:
 288        ttm_mem_global_free(&ttm_mem_glob, acc_size);
 289        return ret;
 290}
 291
 292/**
 293 * vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object
 294 * @vbo: The buffer object
 295 *
 296 * This function releases a dirty-tracking user from a buffer object.
 297 * If the reference count reaches zero, then the dirty-tracking object is
 298 * freed and the pointer to it cleared.
 299 *
 300 * Return: Zero on success, -ENOMEM on memory allocation failure.
 301 */
 302void vmw_bo_dirty_release(struct vmw_buffer_object *vbo)
 303{
 304        struct vmw_bo_dirty *dirty = vbo->dirty;
 305
 306        if (dirty && --dirty->ref_count == 0) {
 307                size_t acc_size = dirty->size;
 308
 309                kvfree(dirty);
 310                ttm_mem_global_free(&ttm_mem_glob, acc_size);
 311                vbo->dirty = NULL;
 312        }
 313}
 314
 315/**
 316 * vmw_bo_dirty_transfer_to_res - Pick up a resource's dirty region from
 317 * its backing mob.
 318 * @res: The resource
 319 *
 320 * This function will pick up all dirty ranges affecting the resource from
 321 * it's backup mob, and call vmw_resource_dirty_update() once for each
 322 * range. The transferred ranges will be cleared from the backing mob's
 323 * dirty tracking.
 324 */
 325void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res)
 326{
 327        struct vmw_buffer_object *vbo = res->backup;
 328        struct vmw_bo_dirty *dirty = vbo->dirty;
 329        pgoff_t start, cur, end;
 330        unsigned long res_start = res->backup_offset;
 331        unsigned long res_end = res->backup_offset + res->backup_size;
 332
 333        WARN_ON_ONCE(res_start & ~PAGE_MASK);
 334        res_start >>= PAGE_SHIFT;
 335        res_end = DIV_ROUND_UP(res_end, PAGE_SIZE);
 336
 337        if (res_start >= dirty->end || res_end <= dirty->start)
 338                return;
 339
 340        cur = max(res_start, dirty->start);
 341        res_end = max(res_end, dirty->end);
 342        while (cur < res_end) {
 343                unsigned long num;
 344
 345                start = find_next_bit(&dirty->bitmap[0], res_end, cur);
 346                if (start >= res_end)
 347                        break;
 348
 349                end = find_next_zero_bit(&dirty->bitmap[0], res_end, start + 1);
 350                cur = end + 1;
 351                num = end - start;
 352                bitmap_clear(&dirty->bitmap[0], start, num);
 353                vmw_resource_dirty_update(res, start, end);
 354        }
 355
 356        if (res_start <= dirty->start && res_end > dirty->start)
 357                dirty->start = res_end;
 358        if (res_start < dirty->end && res_end >= dirty->end)
 359                dirty->end = res_start;
 360}
 361
 362/**
 363 * vmw_bo_dirty_clear_res - Clear a resource's dirty region from
 364 * its backing mob.
 365 * @res: The resource
 366 *
 367 * This function will clear all dirty ranges affecting the resource from
 368 * it's backup mob's dirty tracking.
 369 */
 370void vmw_bo_dirty_clear_res(struct vmw_resource *res)
 371{
 372        unsigned long res_start = res->backup_offset;
 373        unsigned long res_end = res->backup_offset + res->backup_size;
 374        struct vmw_buffer_object *vbo = res->backup;
 375        struct vmw_bo_dirty *dirty = vbo->dirty;
 376
 377        res_start >>= PAGE_SHIFT;
 378        res_end = DIV_ROUND_UP(res_end, PAGE_SIZE);
 379
 380        if (res_start >= dirty->end || res_end <= dirty->start)
 381                return;
 382
 383        res_start = max(res_start, dirty->start);
 384        res_end = min(res_end, dirty->end);
 385        bitmap_clear(&dirty->bitmap[0], res_start, res_end - res_start);
 386
 387        if (res_start <= dirty->start && res_end > dirty->start)
 388                dirty->start = res_end;
 389        if (res_start < dirty->end && res_end >= dirty->end)
 390                dirty->end = res_start;
 391}
 392
 393vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf)
 394{
 395        struct vm_area_struct *vma = vmf->vma;
 396        struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
 397            vma->vm_private_data;
 398        vm_fault_t ret;
 399        unsigned long page_offset;
 400        unsigned int save_flags;
 401        struct vmw_buffer_object *vbo =
 402                container_of(bo, typeof(*vbo), base);
 403
 404        /*
 405         * mkwrite() doesn't handle the VM_FAULT_RETRY return value correctly.
 406         * So make sure the TTM helpers are aware.
 407         */
 408        save_flags = vmf->flags;
 409        vmf->flags &= ~FAULT_FLAG_ALLOW_RETRY;
 410        ret = ttm_bo_vm_reserve(bo, vmf);
 411        vmf->flags = save_flags;
 412        if (ret)
 413                return ret;
 414
 415        page_offset = vmf->pgoff - drm_vma_node_start(&bo->base.vma_node);
 416        if (unlikely(page_offset >= bo->mem.num_pages)) {
 417                ret = VM_FAULT_SIGBUS;
 418                goto out_unlock;
 419        }
 420
 421        if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE &&
 422            !test_bit(page_offset, &vbo->dirty->bitmap[0])) {
 423                struct vmw_bo_dirty *dirty = vbo->dirty;
 424
 425                __set_bit(page_offset, &dirty->bitmap[0]);
 426                dirty->start = min(dirty->start, page_offset);
 427                dirty->end = max(dirty->end, page_offset + 1);
 428        }
 429
 430out_unlock:
 431        dma_resv_unlock(bo->base.resv);
 432        return ret;
 433}
 434
 435vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf)
 436{
 437        struct vm_area_struct *vma = vmf->vma;
 438        struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
 439            vma->vm_private_data;
 440        struct vmw_buffer_object *vbo =
 441                container_of(bo, struct vmw_buffer_object, base);
 442        pgoff_t num_prefault;
 443        pgprot_t prot;
 444        vm_fault_t ret;
 445
 446        ret = ttm_bo_vm_reserve(bo, vmf);
 447        if (ret)
 448                return ret;
 449
 450        num_prefault = (vma->vm_flags & VM_RAND_READ) ? 1 :
 451                TTM_BO_VM_NUM_PREFAULT;
 452
 453        if (vbo->dirty) {
 454                pgoff_t allowed_prefault;
 455                unsigned long page_offset;
 456
 457                page_offset = vmf->pgoff -
 458                        drm_vma_node_start(&bo->base.vma_node);
 459                if (page_offset >= bo->mem.num_pages ||
 460                    vmw_resources_clean(vbo, page_offset,
 461                                        page_offset + PAGE_SIZE,
 462                                        &allowed_prefault)) {
 463                        ret = VM_FAULT_SIGBUS;
 464                        goto out_unlock;
 465                }
 466
 467                num_prefault = min(num_prefault, allowed_prefault);
 468        }
 469
 470        /*
 471         * If we don't track dirty using the MKWRITE method, make sure
 472         * sure the page protection is write-enabled so we don't get
 473         * a lot of unnecessary write faults.
 474         */
 475        if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE)
 476                prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED);
 477        else
 478                prot = vm_get_page_prot(vma->vm_flags);
 479
 480        ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault, 1);
 481        if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
 482                return ret;
 483
 484out_unlock:
 485        dma_resv_unlock(bo->base.resv);
 486
 487        return ret;
 488}
 489
 490#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 491vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
 492                                enum page_entry_size pe_size)
 493{
 494        struct vm_area_struct *vma = vmf->vma;
 495        struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
 496            vma->vm_private_data;
 497        struct vmw_buffer_object *vbo =
 498                container_of(bo, struct vmw_buffer_object, base);
 499        pgprot_t prot;
 500        vm_fault_t ret;
 501        pgoff_t fault_page_size;
 502        bool write = vmf->flags & FAULT_FLAG_WRITE;
 503
 504        switch (pe_size) {
 505        case PE_SIZE_PMD:
 506                fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT;
 507                break;
 508#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 509        case PE_SIZE_PUD:
 510                fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT;
 511                break;
 512#endif
 513        default:
 514                WARN_ON_ONCE(1);
 515                return VM_FAULT_FALLBACK;
 516        }
 517
 518        /* Always do write dirty-tracking and COW on PTE level. */
 519        if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping(vma->vm_flags)))
 520                return VM_FAULT_FALLBACK;
 521
 522        ret = ttm_bo_vm_reserve(bo, vmf);
 523        if (ret)
 524                return ret;
 525
 526        if (vbo->dirty) {
 527                pgoff_t allowed_prefault;
 528                unsigned long page_offset;
 529
 530                page_offset = vmf->pgoff -
 531                        drm_vma_node_start(&bo->base.vma_node);
 532                if (page_offset >= bo->mem.num_pages ||
 533                    vmw_resources_clean(vbo, page_offset,
 534                                        page_offset + PAGE_SIZE,
 535                                        &allowed_prefault)) {
 536                        ret = VM_FAULT_SIGBUS;
 537                        goto out_unlock;
 538                }
 539
 540                /*
 541                 * Write protect, so we get a new fault on write, and can
 542                 * split.
 543                 */
 544                prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED);
 545        } else {
 546                prot = vm_get_page_prot(vma->vm_flags);
 547        }
 548
 549        ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size);
 550        if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
 551                return ret;
 552
 553out_unlock:
 554        dma_resv_unlock(bo->base.resv);
 555
 556        return ret;
 557}
 558#endif
 559