linux/mm/userfaultfd.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *  mm/userfaultfd.c
   4 *
   5 *  Copyright (C) 2015  Red Hat, Inc.
   6 */
   7
   8#include <linux/mm.h>
   9#include <linux/sched/signal.h>
  10#include <linux/pagemap.h>
  11#include <linux/rmap.h>
  12#include <linux/swap.h>
  13#include <linux/swapops.h>
  14#include <linux/userfaultfd_k.h>
  15#include <linux/mmu_notifier.h>
  16#include <linux/hugetlb.h>
  17#include <linux/shmem_fs.h>
  18#include <asm/tlbflush.h>
  19#include "internal.h"
  20
  21static __always_inline
  22struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
  23                                    unsigned long dst_start,
  24                                    unsigned long len)
  25{
  26        /*
  27         * Make sure that the dst range is both valid and fully within a
  28         * single existing vma.
  29         */
  30        struct vm_area_struct *dst_vma;
  31
  32        dst_vma = find_vma(dst_mm, dst_start);
  33        if (!dst_vma)
  34                return NULL;
  35
  36        if (dst_start < dst_vma->vm_start ||
  37            dst_start + len > dst_vma->vm_end)
  38                return NULL;
  39
  40        /*
  41         * Check the vma is registered in uffd, this is required to
  42         * enforce the VM_MAYWRITE check done at uffd registration
  43         * time.
  44         */
  45        if (!dst_vma->vm_userfaultfd_ctx.ctx)
  46                return NULL;
  47
  48        return dst_vma;
  49}
  50
  51/*
  52 * Install PTEs, to map dst_addr (within dst_vma) to page.
  53 *
  54 * This function handles both MCOPY_ATOMIC_NORMAL and _CONTINUE for both shmem
  55 * and anon, and for both shared and private VMAs.
  56 */
  57int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
  58                             struct vm_area_struct *dst_vma,
  59                             unsigned long dst_addr, struct page *page,
  60                             bool newly_allocated, bool wp_copy)
  61{
  62        int ret;
  63        pte_t _dst_pte, *dst_pte;
  64        bool writable = dst_vma->vm_flags & VM_WRITE;
  65        bool vm_shared = dst_vma->vm_flags & VM_SHARED;
  66        bool page_in_cache = page->mapping;
  67        spinlock_t *ptl;
  68        struct inode *inode;
  69        pgoff_t offset, max_off;
  70
  71        _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
  72        if (page_in_cache && !vm_shared)
  73                writable = false;
  74        if (writable || !page_in_cache)
  75                _dst_pte = pte_mkdirty(_dst_pte);
  76        if (writable) {
  77                if (wp_copy)
  78                        _dst_pte = pte_mkuffd_wp(_dst_pte);
  79                else
  80                        _dst_pte = pte_mkwrite(_dst_pte);
  81        }
  82
  83        dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
  84
  85        if (vma_is_shmem(dst_vma)) {
  86                /* serialize against truncate with the page table lock */
  87                inode = dst_vma->vm_file->f_inode;
  88                offset = linear_page_index(dst_vma, dst_addr);
  89                max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
  90                ret = -EFAULT;
  91                if (unlikely(offset >= max_off))
  92                        goto out_unlock;
  93        }
  94
  95        ret = -EEXIST;
  96        if (!pte_none(*dst_pte))
  97                goto out_unlock;
  98
  99        if (page_in_cache)
 100                page_add_file_rmap(page, false);
 101        else
 102                page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
 103
 104        /*
 105         * Must happen after rmap, as mm_counter() checks mapping (via
 106         * PageAnon()), which is set by __page_set_anon_rmap().
 107         */
 108        inc_mm_counter(dst_mm, mm_counter(page));
 109
 110        if (newly_allocated)
 111                lru_cache_add_inactive_or_unevictable(page, dst_vma);
 112
 113        set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
 114
 115        /* No need to invalidate - it was non-present before */
 116        update_mmu_cache(dst_vma, dst_addr, dst_pte);
 117        ret = 0;
 118out_unlock:
 119        pte_unmap_unlock(dst_pte, ptl);
 120        return ret;
 121}
 122
 123static int mcopy_atomic_pte(struct mm_struct *dst_mm,
 124                            pmd_t *dst_pmd,
 125                            struct vm_area_struct *dst_vma,
 126                            unsigned long dst_addr,
 127                            unsigned long src_addr,
 128                            struct page **pagep,
 129                            bool wp_copy)
 130{
 131        void *page_kaddr;
 132        int ret;
 133        struct page *page;
 134
 135        if (!*pagep) {
 136                ret = -ENOMEM;
 137                page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
 138                if (!page)
 139                        goto out;
 140
 141                page_kaddr = kmap_atomic(page);
 142                ret = copy_from_user(page_kaddr,
 143                                     (const void __user *) src_addr,
 144                                     PAGE_SIZE);
 145                kunmap_atomic(page_kaddr);
 146
 147                /* fallback to copy_from_user outside mmap_lock */
 148                if (unlikely(ret)) {
 149                        ret = -ENOENT;
 150                        *pagep = page;
 151                        /* don't free the page */
 152                        goto out;
 153                }
 154        } else {
 155                page = *pagep;
 156                *pagep = NULL;
 157        }
 158
 159        /*
 160         * The memory barrier inside __SetPageUptodate makes sure that
 161         * preceding stores to the page contents become visible before
 162         * the set_pte_at() write.
 163         */
 164        __SetPageUptodate(page);
 165
 166        ret = -ENOMEM;
 167        if (mem_cgroup_charge(page, dst_mm, GFP_KERNEL))
 168                goto out_release;
 169
 170        ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
 171                                       page, true, wp_copy);
 172        if (ret)
 173                goto out_release;
 174out:
 175        return ret;
 176out_release:
 177        put_page(page);
 178        goto out;
 179}
 180
 181static int mfill_zeropage_pte(struct mm_struct *dst_mm,
 182                              pmd_t *dst_pmd,
 183                              struct vm_area_struct *dst_vma,
 184                              unsigned long dst_addr)
 185{
 186        pte_t _dst_pte, *dst_pte;
 187        spinlock_t *ptl;
 188        int ret;
 189        pgoff_t offset, max_off;
 190        struct inode *inode;
 191
 192        _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
 193                                         dst_vma->vm_page_prot));
 194        dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
 195        if (dst_vma->vm_file) {
 196                /* the shmem MAP_PRIVATE case requires checking the i_size */
 197                inode = dst_vma->vm_file->f_inode;
 198                offset = linear_page_index(dst_vma, dst_addr);
 199                max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
 200                ret = -EFAULT;
 201                if (unlikely(offset >= max_off))
 202                        goto out_unlock;
 203        }
 204        ret = -EEXIST;
 205        if (!pte_none(*dst_pte))
 206                goto out_unlock;
 207        set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
 208        /* No need to invalidate - it was non-present before */
 209        update_mmu_cache(dst_vma, dst_addr, dst_pte);
 210        ret = 0;
 211out_unlock:
 212        pte_unmap_unlock(dst_pte, ptl);
 213        return ret;
 214}
 215
 216/* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */
 217static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
 218                                pmd_t *dst_pmd,
 219                                struct vm_area_struct *dst_vma,
 220                                unsigned long dst_addr,
 221                                bool wp_copy)
 222{
 223        struct inode *inode = file_inode(dst_vma->vm_file);
 224        pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
 225        struct page *page;
 226        int ret;
 227
 228        ret = shmem_getpage(inode, pgoff, &page, SGP_READ);
 229        if (ret)
 230                goto out;
 231        if (!page) {
 232                ret = -EFAULT;
 233                goto out;
 234        }
 235
 236        ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
 237                                       page, false, wp_copy);
 238        if (ret)
 239                goto out_release;
 240
 241        unlock_page(page);
 242        ret = 0;
 243out:
 244        return ret;
 245out_release:
 246        unlock_page(page);
 247        put_page(page);
 248        goto out;
 249}
 250
 251static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
 252{
 253        pgd_t *pgd;
 254        p4d_t *p4d;
 255        pud_t *pud;
 256
 257        pgd = pgd_offset(mm, address);
 258        p4d = p4d_alloc(mm, pgd, address);
 259        if (!p4d)
 260                return NULL;
 261        pud = pud_alloc(mm, p4d, address);
 262        if (!pud)
 263                return NULL;
 264        /*
 265         * Note that we didn't run this because the pmd was
 266         * missing, the *pmd may be already established and in
 267         * turn it may also be a trans_huge_pmd.
 268         */
 269        return pmd_alloc(mm, pud, address);
 270}
 271
 272#ifdef CONFIG_HUGETLB_PAGE
 273/*
 274 * __mcopy_atomic processing for HUGETLB vmas.  Note that this routine is
 275 * called with mmap_lock held, it will release mmap_lock before returning.
 276 */
 277static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
 278                                              struct vm_area_struct *dst_vma,
 279                                              unsigned long dst_start,
 280                                              unsigned long src_start,
 281                                              unsigned long len,
 282                                              enum mcopy_atomic_mode mode)
 283{
 284        int vm_shared = dst_vma->vm_flags & VM_SHARED;
 285        ssize_t err;
 286        pte_t *dst_pte;
 287        unsigned long src_addr, dst_addr;
 288        long copied;
 289        struct page *page;
 290        unsigned long vma_hpagesize;
 291        pgoff_t idx;
 292        u32 hash;
 293        struct address_space *mapping;
 294
 295        /*
 296         * There is no default zero huge page for all huge page sizes as
 297         * supported by hugetlb.  A PMD_SIZE huge pages may exist as used
 298         * by THP.  Since we can not reliably insert a zero page, this
 299         * feature is not supported.
 300         */
 301        if (mode == MCOPY_ATOMIC_ZEROPAGE) {
 302                mmap_read_unlock(dst_mm);
 303                return -EINVAL;
 304        }
 305
 306        src_addr = src_start;
 307        dst_addr = dst_start;
 308        copied = 0;
 309        page = NULL;
 310        vma_hpagesize = vma_kernel_pagesize(dst_vma);
 311
 312        /*
 313         * Validate alignment based on huge page size
 314         */
 315        err = -EINVAL;
 316        if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1))
 317                goto out_unlock;
 318
 319retry:
 320        /*
 321         * On routine entry dst_vma is set.  If we had to drop mmap_lock and
 322         * retry, dst_vma will be set to NULL and we must lookup again.
 323         */
 324        if (!dst_vma) {
 325                err = -ENOENT;
 326                dst_vma = find_dst_vma(dst_mm, dst_start, len);
 327                if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
 328                        goto out_unlock;
 329
 330                err = -EINVAL;
 331                if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
 332                        goto out_unlock;
 333
 334                vm_shared = dst_vma->vm_flags & VM_SHARED;
 335        }
 336
 337        /*
 338         * If not shared, ensure the dst_vma has a anon_vma.
 339         */
 340        err = -ENOMEM;
 341        if (!vm_shared) {
 342                if (unlikely(anon_vma_prepare(dst_vma)))
 343                        goto out_unlock;
 344        }
 345
 346        while (src_addr < src_start + len) {
 347                BUG_ON(dst_addr >= dst_start + len);
 348
 349                /*
 350                 * Serialize via i_mmap_rwsem and hugetlb_fault_mutex.
 351                 * i_mmap_rwsem ensures the dst_pte remains valid even
 352                 * in the case of shared pmds.  fault mutex prevents
 353                 * races with other faulting threads.
 354                 */
 355                mapping = dst_vma->vm_file->f_mapping;
 356                i_mmap_lock_read(mapping);
 357                idx = linear_page_index(dst_vma, dst_addr);
 358                hash = hugetlb_fault_mutex_hash(mapping, idx);
 359                mutex_lock(&hugetlb_fault_mutex_table[hash]);
 360
 361                err = -ENOMEM;
 362                dst_pte = huge_pte_alloc(dst_mm, dst_vma, dst_addr, vma_hpagesize);
 363                if (!dst_pte) {
 364                        mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 365                        i_mmap_unlock_read(mapping);
 366                        goto out_unlock;
 367                }
 368
 369                if (mode != MCOPY_ATOMIC_CONTINUE &&
 370                    !huge_pte_none(huge_ptep_get(dst_pte))) {
 371                        err = -EEXIST;
 372                        mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 373                        i_mmap_unlock_read(mapping);
 374                        goto out_unlock;
 375                }
 376
 377                err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
 378                                               dst_addr, src_addr, mode, &page);
 379
 380                mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 381                i_mmap_unlock_read(mapping);
 382
 383                cond_resched();
 384
 385                if (unlikely(err == -ENOENT)) {
 386                        mmap_read_unlock(dst_mm);
 387                        BUG_ON(!page);
 388
 389                        err = copy_huge_page_from_user(page,
 390                                                (const void __user *)src_addr,
 391                                                vma_hpagesize / PAGE_SIZE,
 392                                                true);
 393                        if (unlikely(err)) {
 394                                err = -EFAULT;
 395                                goto out;
 396                        }
 397                        mmap_read_lock(dst_mm);
 398
 399                        dst_vma = NULL;
 400                        goto retry;
 401                } else
 402                        BUG_ON(page);
 403
 404                if (!err) {
 405                        dst_addr += vma_hpagesize;
 406                        src_addr += vma_hpagesize;
 407                        copied += vma_hpagesize;
 408
 409                        if (fatal_signal_pending(current))
 410                                err = -EINTR;
 411                }
 412                if (err)
 413                        break;
 414        }
 415
 416out_unlock:
 417        mmap_read_unlock(dst_mm);
 418out:
 419        if (page)
 420                put_page(page);
 421        BUG_ON(copied < 0);
 422        BUG_ON(err > 0);
 423        BUG_ON(!copied && !err);
 424        return copied ? copied : err;
 425}
 426#else /* !CONFIG_HUGETLB_PAGE */
 427/* fail at build time if gcc attempts to use this */
 428extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
 429                                      struct vm_area_struct *dst_vma,
 430                                      unsigned long dst_start,
 431                                      unsigned long src_start,
 432                                      unsigned long len,
 433                                      enum mcopy_atomic_mode mode);
 434#endif /* CONFIG_HUGETLB_PAGE */
 435
 436static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
 437                                                pmd_t *dst_pmd,
 438                                                struct vm_area_struct *dst_vma,
 439                                                unsigned long dst_addr,
 440                                                unsigned long src_addr,
 441                                                struct page **page,
 442                                                enum mcopy_atomic_mode mode,
 443                                                bool wp_copy)
 444{
 445        ssize_t err;
 446
 447        if (mode == MCOPY_ATOMIC_CONTINUE) {
 448                return mcontinue_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
 449                                            wp_copy);
 450        }
 451
 452        /*
 453         * The normal page fault path for a shmem will invoke the
 454         * fault, fill the hole in the file and COW it right away. The
 455         * result generates plain anonymous memory. So when we are
 456         * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll
 457         * generate anonymous memory directly without actually filling
 458         * the hole. For the MAP_PRIVATE case the robustness check
 459         * only happens in the pagetable (to verify it's still none)
 460         * and not in the radix tree.
 461         */
 462        if (!(dst_vma->vm_flags & VM_SHARED)) {
 463                if (mode == MCOPY_ATOMIC_NORMAL)
 464                        err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
 465                                               dst_addr, src_addr, page,
 466                                               wp_copy);
 467                else
 468                        err = mfill_zeropage_pte(dst_mm, dst_pmd,
 469                                                 dst_vma, dst_addr);
 470        } else {
 471                VM_WARN_ON_ONCE(wp_copy);
 472                err = shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
 473                                             dst_addr, src_addr,
 474                                             mode != MCOPY_ATOMIC_NORMAL,
 475                                             page);
 476        }
 477
 478        return err;
 479}
 480
 481static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 482                                              unsigned long dst_start,
 483                                              unsigned long src_start,
 484                                              unsigned long len,
 485                                              enum mcopy_atomic_mode mcopy_mode,
 486                                              atomic_t *mmap_changing,
 487                                              __u64 mode)
 488{
 489        struct vm_area_struct *dst_vma;
 490        ssize_t err;
 491        pmd_t *dst_pmd;
 492        unsigned long src_addr, dst_addr;
 493        long copied;
 494        struct page *page;
 495        bool wp_copy;
 496
 497        /*
 498         * Sanitize the command parameters:
 499         */
 500        BUG_ON(dst_start & ~PAGE_MASK);
 501        BUG_ON(len & ~PAGE_MASK);
 502
 503        /* Does the address range wrap, or is the span zero-sized? */
 504        BUG_ON(src_start + len <= src_start);
 505        BUG_ON(dst_start + len <= dst_start);
 506
 507        src_addr = src_start;
 508        dst_addr = dst_start;
 509        copied = 0;
 510        page = NULL;
 511retry:
 512        mmap_read_lock(dst_mm);
 513
 514        /*
 515         * If memory mappings are changing because of non-cooperative
 516         * operation (e.g. mremap) running in parallel, bail out and
 517         * request the user to retry later
 518         */
 519        err = -EAGAIN;
 520        if (mmap_changing && atomic_read(mmap_changing))
 521                goto out_unlock;
 522
 523        /*
 524         * Make sure the vma is not shared, that the dst range is
 525         * both valid and fully within a single existing vma.
 526         */
 527        err = -ENOENT;
 528        dst_vma = find_dst_vma(dst_mm, dst_start, len);
 529        if (!dst_vma)
 530                goto out_unlock;
 531
 532        err = -EINVAL;
 533        /*
 534         * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but
 535         * it will overwrite vm_ops, so vma_is_anonymous must return false.
 536         */
 537        if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
 538            dst_vma->vm_flags & VM_SHARED))
 539                goto out_unlock;
 540
 541        /*
 542         * validate 'mode' now that we know the dst_vma: don't allow
 543         * a wrprotect copy if the userfaultfd didn't register as WP.
 544         */
 545        wp_copy = mode & UFFDIO_COPY_MODE_WP;
 546        if (wp_copy && !(dst_vma->vm_flags & VM_UFFD_WP))
 547                goto out_unlock;
 548
 549        /*
 550         * If this is a HUGETLB vma, pass off to appropriate routine
 551         */
 552        if (is_vm_hugetlb_page(dst_vma))
 553                return  __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
 554                                                src_start, len, mcopy_mode);
 555
 556        if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
 557                goto out_unlock;
 558        if (!vma_is_shmem(dst_vma) && mcopy_mode == MCOPY_ATOMIC_CONTINUE)
 559                goto out_unlock;
 560
 561        /*
 562         * Ensure the dst_vma has a anon_vma or this page
 563         * would get a NULL anon_vma when moved in the
 564         * dst_vma.
 565         */
 566        err = -ENOMEM;
 567        if (!(dst_vma->vm_flags & VM_SHARED) &&
 568            unlikely(anon_vma_prepare(dst_vma)))
 569                goto out_unlock;
 570
 571        while (src_addr < src_start + len) {
 572                pmd_t dst_pmdval;
 573
 574                BUG_ON(dst_addr >= dst_start + len);
 575
 576                dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
 577                if (unlikely(!dst_pmd)) {
 578                        err = -ENOMEM;
 579                        break;
 580                }
 581
 582                dst_pmdval = pmd_read_atomic(dst_pmd);
 583                /*
 584                 * If the dst_pmd is mapped as THP don't
 585                 * override it and just be strict.
 586                 */
 587                if (unlikely(pmd_trans_huge(dst_pmdval))) {
 588                        err = -EEXIST;
 589                        break;
 590                }
 591                if (unlikely(pmd_none(dst_pmdval)) &&
 592                    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
 593                        err = -ENOMEM;
 594                        break;
 595                }
 596                /* If an huge pmd materialized from under us fail */
 597                if (unlikely(pmd_trans_huge(*dst_pmd))) {
 598                        err = -EFAULT;
 599                        break;
 600                }
 601
 602                BUG_ON(pmd_none(*dst_pmd));
 603                BUG_ON(pmd_trans_huge(*dst_pmd));
 604
 605                err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
 606                                       src_addr, &page, mcopy_mode, wp_copy);
 607                cond_resched();
 608
 609                if (unlikely(err == -ENOENT)) {
 610                        void *page_kaddr;
 611
 612                        mmap_read_unlock(dst_mm);
 613                        BUG_ON(!page);
 614
 615                        page_kaddr = kmap(page);
 616                        err = copy_from_user(page_kaddr,
 617                                             (const void __user *) src_addr,
 618                                             PAGE_SIZE);
 619                        kunmap(page);
 620                        if (unlikely(err)) {
 621                                err = -EFAULT;
 622                                goto out;
 623                        }
 624                        goto retry;
 625                } else
 626                        BUG_ON(page);
 627
 628                if (!err) {
 629                        dst_addr += PAGE_SIZE;
 630                        src_addr += PAGE_SIZE;
 631                        copied += PAGE_SIZE;
 632
 633                        if (fatal_signal_pending(current))
 634                                err = -EINTR;
 635                }
 636                if (err)
 637                        break;
 638        }
 639
 640out_unlock:
 641        mmap_read_unlock(dst_mm);
 642out:
 643        if (page)
 644                put_page(page);
 645        BUG_ON(copied < 0);
 646        BUG_ON(err > 0);
 647        BUG_ON(!copied && !err);
 648        return copied ? copied : err;
 649}
 650
 651ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
 652                     unsigned long src_start, unsigned long len,
 653                     atomic_t *mmap_changing, __u64 mode)
 654{
 655        return __mcopy_atomic(dst_mm, dst_start, src_start, len,
 656                              MCOPY_ATOMIC_NORMAL, mmap_changing, mode);
 657}
 658
 659ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
 660                       unsigned long len, atomic_t *mmap_changing)
 661{
 662        return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
 663                              mmap_changing, 0);
 664}
 665
 666ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
 667                       unsigned long len, atomic_t *mmap_changing)
 668{
 669        return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE,
 670                              mmap_changing, 0);
 671}
 672
 673int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
 674                        unsigned long len, bool enable_wp,
 675                        atomic_t *mmap_changing)
 676{
 677        struct vm_area_struct *dst_vma;
 678        pgprot_t newprot;
 679        int err;
 680
 681        /*
 682         * Sanitize the command parameters:
 683         */
 684        BUG_ON(start & ~PAGE_MASK);
 685        BUG_ON(len & ~PAGE_MASK);
 686
 687        /* Does the address range wrap, or is the span zero-sized? */
 688        BUG_ON(start + len <= start);
 689
 690        mmap_read_lock(dst_mm);
 691
 692        /*
 693         * If memory mappings are changing because of non-cooperative
 694         * operation (e.g. mremap) running in parallel, bail out and
 695         * request the user to retry later
 696         */
 697        err = -EAGAIN;
 698        if (mmap_changing && atomic_read(mmap_changing))
 699                goto out_unlock;
 700
 701        err = -ENOENT;
 702        dst_vma = find_dst_vma(dst_mm, start, len);
 703        /*
 704         * Make sure the vma is not shared, that the dst range is
 705         * both valid and fully within a single existing vma.
 706         */
 707        if (!dst_vma || (dst_vma->vm_flags & VM_SHARED))
 708                goto out_unlock;
 709        if (!userfaultfd_wp(dst_vma))
 710                goto out_unlock;
 711        if (!vma_is_anonymous(dst_vma))
 712                goto out_unlock;
 713
 714        if (enable_wp)
 715                newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
 716        else
 717                newprot = vm_get_page_prot(dst_vma->vm_flags);
 718
 719        change_protection(dst_vma, start, start + len, newprot,
 720                          enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
 721
 722        err = 0;
 723out_unlock:
 724        mmap_read_unlock(dst_mm);
 725        return err;
 726}
 727