linux/mm/mremap.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *      mm/mremap.c
   4 *
   5 *      (C) Copyright 1996 Linus Torvalds
   6 *
   7 *      Address space accounting code   <alan@lxorguk.ukuu.org.uk>
   8 *      (C) Copyright 2002 Red Hat Inc, All Rights Reserved
   9 */
  10
  11#include <linux/mm.h>
  12#include <linux/hugetlb.h>
  13#include <linux/shm.h>
  14#include <linux/ksm.h>
  15#include <linux/mman.h>
  16#include <linux/swap.h>
  17#include <linux/capability.h>
  18#include <linux/fs.h>
  19#include <linux/swapops.h>
  20#include <linux/highmem.h>
  21#include <linux/security.h>
  22#include <linux/syscalls.h>
  23#include <linux/mmu_notifier.h>
  24#include <linux/uaccess.h>
  25#include <linux/mm-arch-hooks.h>
  26#include <linux/userfaultfd_k.h>
  27
  28#include <asm/cacheflush.h>
  29#include <asm/tlbflush.h>
  30
  31#include "internal.h"
  32
  33static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
  34{
  35        pgd_t *pgd;
  36        p4d_t *p4d;
  37        pud_t *pud;
  38        pmd_t *pmd;
  39
  40        pgd = pgd_offset(mm, addr);
  41        if (pgd_none_or_clear_bad(pgd))
  42                return NULL;
  43
  44        p4d = p4d_offset(pgd, addr);
  45        if (p4d_none_or_clear_bad(p4d))
  46                return NULL;
  47
  48        pud = pud_offset(p4d, addr);
  49        if (pud_none_or_clear_bad(pud))
  50                return NULL;
  51
  52        pmd = pmd_offset(pud, addr);
  53        if (pmd_none(*pmd))
  54                return NULL;
  55
  56        return pmd;
  57}
  58
  59static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
  60                            unsigned long addr)
  61{
  62        pgd_t *pgd;
  63        p4d_t *p4d;
  64        pud_t *pud;
  65        pmd_t *pmd;
  66
  67        pgd = pgd_offset(mm, addr);
  68        p4d = p4d_alloc(mm, pgd, addr);
  69        if (!p4d)
  70                return NULL;
  71        pud = pud_alloc(mm, p4d, addr);
  72        if (!pud)
  73                return NULL;
  74
  75        pmd = pmd_alloc(mm, pud, addr);
  76        if (!pmd)
  77                return NULL;
  78
  79        VM_BUG_ON(pmd_trans_huge(*pmd));
  80
  81        return pmd;
  82}
  83
  84static void take_rmap_locks(struct vm_area_struct *vma)
  85{
  86        if (vma->vm_file)
  87                i_mmap_lock_write(vma->vm_file->f_mapping);
  88        if (vma->anon_vma)
  89                anon_vma_lock_write(vma->anon_vma);
  90}
  91
  92static void drop_rmap_locks(struct vm_area_struct *vma)
  93{
  94        if (vma->anon_vma)
  95                anon_vma_unlock_write(vma->anon_vma);
  96        if (vma->vm_file)
  97                i_mmap_unlock_write(vma->vm_file->f_mapping);
  98}
  99
 100static pte_t move_soft_dirty_pte(pte_t pte)
 101{
 102        /*
 103         * Set soft dirty bit so we can notice
 104         * in userspace the ptes were moved.
 105         */
 106#ifdef CONFIG_MEM_SOFT_DIRTY
 107        if (pte_present(pte))
 108                pte = pte_mksoft_dirty(pte);
 109        else if (is_swap_pte(pte))
 110                pte = pte_swp_mksoft_dirty(pte);
 111#endif
 112        return pte;
 113}
 114
 115static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 116                unsigned long old_addr, unsigned long old_end,
 117                struct vm_area_struct *new_vma, pmd_t *new_pmd,
 118                unsigned long new_addr, bool need_rmap_locks)
 119{
 120        struct mm_struct *mm = vma->vm_mm;
 121        pte_t *old_pte, *new_pte, pte;
 122        spinlock_t *old_ptl, *new_ptl;
 123        bool force_flush = false;
 124        unsigned long len = old_end - old_addr;
 125
 126        /*
 127         * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
 128         * locks to ensure that rmap will always observe either the old or the
 129         * new ptes. This is the easiest way to avoid races with
 130         * truncate_pagecache(), page migration, etc...
 131         *
 132         * When need_rmap_locks is false, we use other ways to avoid
 133         * such races:
 134         *
 135         * - During exec() shift_arg_pages(), we use a specially tagged vma
 136         *   which rmap call sites look for using is_vma_temporary_stack().
 137         *
 138         * - During mremap(), new_vma is often known to be placed after vma
 139         *   in rmap traversal order. This ensures rmap will always observe
 140         *   either the old pte, or the new pte, or both (the page table locks
 141         *   serialize access to individual ptes, but only rmap traversal
 142         *   order guarantees that we won't miss both the old and new ptes).
 143         */
 144        if (need_rmap_locks)
 145                take_rmap_locks(vma);
 146
 147        /*
 148         * We don't have to worry about the ordering of src and dst
 149         * pte locks because exclusive mmap_sem prevents deadlock.
 150         */
 151        old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
 152        new_pte = pte_offset_map(new_pmd, new_addr);
 153        new_ptl = pte_lockptr(mm, new_pmd);
 154        if (new_ptl != old_ptl)
 155                spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
 156        flush_tlb_batched_pending(vma->vm_mm);
 157        arch_enter_lazy_mmu_mode();
 158
 159        for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
 160                                   new_pte++, new_addr += PAGE_SIZE) {
 161                if (pte_none(*old_pte))
 162                        continue;
 163
 164                pte = ptep_get_and_clear(mm, old_addr, old_pte);
 165                /*
 166                 * If we are remapping a valid PTE, make sure
 167                 * to flush TLB before we drop the PTL for the
 168                 * PTE.
 169                 *
 170                 * NOTE! Both old and new PTL matter: the old one
 171                 * for racing with page_mkclean(), the new one to
 172                 * make sure the physical page stays valid until
 173                 * the TLB entry for the old mapping has been
 174                 * flushed.
 175                 */
 176                if (pte_present(pte))
 177                        force_flush = true;
 178                pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
 179                pte = move_soft_dirty_pte(pte);
 180                set_pte_at(mm, new_addr, new_pte, pte);
 181        }
 182
 183        arch_leave_lazy_mmu_mode();
 184        if (force_flush)
 185                flush_tlb_range(vma, old_end - len, old_end);
 186        if (new_ptl != old_ptl)
 187                spin_unlock(new_ptl);
 188        pte_unmap(new_pte - 1);
 189        pte_unmap_unlock(old_pte - 1, old_ptl);
 190        if (need_rmap_locks)
 191                drop_rmap_locks(vma);
 192}
 193
 194#ifdef CONFIG_HAVE_MOVE_PMD
 195static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 196                  unsigned long new_addr, unsigned long old_end,
 197                  pmd_t *old_pmd, pmd_t *new_pmd)
 198{
 199        spinlock_t *old_ptl, *new_ptl;
 200        struct mm_struct *mm = vma->vm_mm;
 201        pmd_t pmd;
 202
 203        if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
 204            || old_end - old_addr < PMD_SIZE)
 205                return false;
 206
 207        /*
 208         * The destination pmd shouldn't be established, free_pgtables()
 209         * should have release it.
 210         */
 211        if (WARN_ON(!pmd_none(*new_pmd)))
 212                return false;
 213
 214        /*
 215         * We don't have to worry about the ordering of src and dst
 216         * ptlocks because exclusive mmap_sem prevents deadlock.
 217         */
 218        old_ptl = pmd_lock(vma->vm_mm, old_pmd);
 219        new_ptl = pmd_lockptr(mm, new_pmd);
 220        if (new_ptl != old_ptl)
 221                spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
 222
 223        /* Clear the pmd */
 224        pmd = *old_pmd;
 225        pmd_clear(old_pmd);
 226
 227        VM_BUG_ON(!pmd_none(*new_pmd));
 228
 229        /* Set the new pmd */
 230        set_pmd_at(mm, new_addr, new_pmd, pmd);
 231        flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
 232        if (new_ptl != old_ptl)
 233                spin_unlock(new_ptl);
 234        spin_unlock(old_ptl);
 235
 236        return true;
 237}
 238#endif
 239
 240unsigned long move_page_tables(struct vm_area_struct *vma,
 241                unsigned long old_addr, struct vm_area_struct *new_vma,
 242                unsigned long new_addr, unsigned long len,
 243                bool need_rmap_locks)
 244{
 245        unsigned long extent, next, old_end;
 246        struct mmu_notifier_range range;
 247        pmd_t *old_pmd, *new_pmd;
 248
 249        old_end = old_addr + len;
 250        flush_cache_range(vma, old_addr, old_end);
 251
 252        mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
 253                                old_addr, old_end);
 254        mmu_notifier_invalidate_range_start(&range);
 255
 256        for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
 257                cond_resched();
 258                next = (old_addr + PMD_SIZE) & PMD_MASK;
 259                /* even if next overflowed, extent below will be ok */
 260                extent = next - old_addr;
 261                if (extent > old_end - old_addr)
 262                        extent = old_end - old_addr;
 263                old_pmd = get_old_pmd(vma->vm_mm, old_addr);
 264                if (!old_pmd)
 265                        continue;
 266                new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr);
 267                if (!new_pmd)
 268                        break;
 269                if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd)) {
 270                        if (extent == HPAGE_PMD_SIZE) {
 271                                bool moved;
 272                                /* See comment in move_ptes() */
 273                                if (need_rmap_locks)
 274                                        take_rmap_locks(vma);
 275                                moved = move_huge_pmd(vma, old_addr, new_addr,
 276                                                    old_end, old_pmd, new_pmd);
 277                                if (need_rmap_locks)
 278                                        drop_rmap_locks(vma);
 279                                if (moved)
 280                                        continue;
 281                        }
 282                        split_huge_pmd(vma, old_pmd, old_addr);
 283                        if (pmd_trans_unstable(old_pmd))
 284                                continue;
 285                } else if (extent == PMD_SIZE) {
 286#ifdef CONFIG_HAVE_MOVE_PMD
 287                        /*
 288                         * If the extent is PMD-sized, try to speed the move by
 289                         * moving at the PMD level if possible.
 290                         */
 291                        bool moved;
 292
 293                        if (need_rmap_locks)
 294                                take_rmap_locks(vma);
 295                        moved = move_normal_pmd(vma, old_addr, new_addr,
 296                                        old_end, old_pmd, new_pmd);
 297                        if (need_rmap_locks)
 298                                drop_rmap_locks(vma);
 299                        if (moved)
 300                                continue;
 301#endif
 302                }
 303
 304                if (pte_alloc(new_vma->vm_mm, new_pmd))
 305                        break;
 306                next = (new_addr + PMD_SIZE) & PMD_MASK;
 307                if (extent > next - new_addr)
 308                        extent = next - new_addr;
 309                move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
 310                          new_pmd, new_addr, need_rmap_locks);
 311        }
 312
 313        mmu_notifier_invalidate_range_end(&range);
 314
 315        return len + old_addr - old_end;        /* how much done */
 316}
 317
 318static unsigned long move_vma(struct vm_area_struct *vma,
 319                unsigned long old_addr, unsigned long old_len,
 320                unsigned long new_len, unsigned long new_addr,
 321                bool *locked, struct vm_userfaultfd_ctx *uf,
 322                struct list_head *uf_unmap)
 323{
 324        struct mm_struct *mm = vma->vm_mm;
 325        struct vm_area_struct *new_vma;
 326        unsigned long vm_flags = vma->vm_flags;
 327        unsigned long new_pgoff;
 328        unsigned long moved_len;
 329        unsigned long excess = 0;
 330        unsigned long hiwater_vm;
 331        int split = 0;
 332        int err;
 333        bool need_rmap_locks;
 334
 335        /*
 336         * We'd prefer to avoid failure later on in do_munmap:
 337         * which may split one vma into three before unmapping.
 338         */
 339        if (mm->map_count >= sysctl_max_map_count - 3)
 340                return -ENOMEM;
 341
 342        /*
 343         * Advise KSM to break any KSM pages in the area to be moved:
 344         * it would be confusing if they were to turn up at the new
 345         * location, where they happen to coincide with different KSM
 346         * pages recently unmapped.  But leave vma->vm_flags as it was,
 347         * so KSM can come around to merge on vma and new_vma afterwards.
 348         */
 349        err = ksm_madvise(vma, old_addr, old_addr + old_len,
 350                                                MADV_UNMERGEABLE, &vm_flags);
 351        if (err)
 352                return err;
 353
 354        new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
 355        new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff,
 356                           &need_rmap_locks);
 357        if (!new_vma)
 358                return -ENOMEM;
 359
 360        moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len,
 361                                     need_rmap_locks);
 362        if (moved_len < old_len) {
 363                err = -ENOMEM;
 364        } else if (vma->vm_ops && vma->vm_ops->mremap) {
 365                err = vma->vm_ops->mremap(new_vma);
 366        }
 367
 368        if (unlikely(err)) {
 369                /*
 370                 * On error, move entries back from new area to old,
 371                 * which will succeed since page tables still there,
 372                 * and then proceed to unmap new area instead of old.
 373                 */
 374                move_page_tables(new_vma, new_addr, vma, old_addr, moved_len,
 375                                 true);
 376                vma = new_vma;
 377                old_len = new_len;
 378                old_addr = new_addr;
 379                new_addr = err;
 380        } else {
 381                mremap_userfaultfd_prep(new_vma, uf);
 382                arch_remap(mm, old_addr, old_addr + old_len,
 383                           new_addr, new_addr + new_len);
 384        }
 385
 386        /* Conceal VM_ACCOUNT so old reservation is not undone */
 387        if (vm_flags & VM_ACCOUNT) {
 388                vma->vm_flags &= ~VM_ACCOUNT;
 389                excess = vma->vm_end - vma->vm_start - old_len;
 390                if (old_addr > vma->vm_start &&
 391                    old_addr + old_len < vma->vm_end)
 392                        split = 1;
 393        }
 394
 395        /*
 396         * If we failed to move page tables we still do total_vm increment
 397         * since do_munmap() will decrement it by old_len == new_len.
 398         *
 399         * Since total_vm is about to be raised artificially high for a
 400         * moment, we need to restore high watermark afterwards: if stats
 401         * are taken meanwhile, total_vm and hiwater_vm appear too high.
 402         * If this were a serious issue, we'd add a flag to do_munmap().
 403         */
 404        hiwater_vm = mm->hiwater_vm;
 405        vm_stat_account(mm, vma->vm_flags, new_len >> PAGE_SHIFT);
 406
 407        /* Tell pfnmap has moved from this vma */
 408        if (unlikely(vma->vm_flags & VM_PFNMAP))
 409                untrack_pfn_moved(vma);
 410
 411        if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
 412                /* OOM: unable to split vma, just get accounts right */
 413                vm_unacct_memory(excess >> PAGE_SHIFT);
 414                excess = 0;
 415        }
 416        mm->hiwater_vm = hiwater_vm;
 417
 418        /* Restore VM_ACCOUNT if one or two pieces of vma left */
 419        if (excess) {
 420                vma->vm_flags |= VM_ACCOUNT;
 421                if (split)
 422                        vma->vm_next->vm_flags |= VM_ACCOUNT;
 423        }
 424
 425        if (vm_flags & VM_LOCKED) {
 426                mm->locked_vm += new_len >> PAGE_SHIFT;
 427                *locked = true;
 428        }
 429
 430        return new_addr;
 431}
 432
 433static struct vm_area_struct *vma_to_resize(unsigned long addr,
 434        unsigned long old_len, unsigned long new_len, unsigned long *p)
 435{
 436        struct mm_struct *mm = current->mm;
 437        struct vm_area_struct *vma = find_vma(mm, addr);
 438        unsigned long pgoff;
 439
 440        if (!vma || vma->vm_start > addr)
 441                return ERR_PTR(-EFAULT);
 442
 443        /*
 444         * !old_len is a special case where an attempt is made to 'duplicate'
 445         * a mapping.  This makes no sense for private mappings as it will
 446         * instead create a fresh/new mapping unrelated to the original.  This
 447         * is contrary to the basic idea of mremap which creates new mappings
 448         * based on the original.  There are no known use cases for this
 449         * behavior.  As a result, fail such attempts.
 450         */
 451        if (!old_len && !(vma->vm_flags & (VM_SHARED | VM_MAYSHARE))) {
 452                pr_warn_once("%s (%d): attempted to duplicate a private mapping with mremap.  This is not supported.\n", current->comm, current->pid);
 453                return ERR_PTR(-EINVAL);
 454        }
 455
 456        if (is_vm_hugetlb_page(vma))
 457                return ERR_PTR(-EINVAL);
 458
 459        /* We can't remap across vm area boundaries */
 460        if (old_len > vma->vm_end - addr)
 461                return ERR_PTR(-EFAULT);
 462
 463        if (new_len == old_len)
 464                return vma;
 465
 466        /* Need to be careful about a growing mapping */
 467        pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
 468        pgoff += vma->vm_pgoff;
 469        if (pgoff + (new_len >> PAGE_SHIFT) < pgoff)
 470                return ERR_PTR(-EINVAL);
 471
 472        if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))
 473                return ERR_PTR(-EFAULT);
 474
 475        if (vma->vm_flags & VM_LOCKED) {
 476                unsigned long locked, lock_limit;
 477                locked = mm->locked_vm << PAGE_SHIFT;
 478                lock_limit = rlimit(RLIMIT_MEMLOCK);
 479                locked += new_len - old_len;
 480                if (locked > lock_limit && !capable(CAP_IPC_LOCK))
 481                        return ERR_PTR(-EAGAIN);
 482        }
 483
 484        if (!may_expand_vm(mm, vma->vm_flags,
 485                                (new_len - old_len) >> PAGE_SHIFT))
 486                return ERR_PTR(-ENOMEM);
 487
 488        if (vma->vm_flags & VM_ACCOUNT) {
 489                unsigned long charged = (new_len - old_len) >> PAGE_SHIFT;
 490                if (security_vm_enough_memory_mm(mm, charged))
 491                        return ERR_PTR(-ENOMEM);
 492                *p = charged;
 493        }
 494
 495        return vma;
 496}
 497
 498static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
 499                unsigned long new_addr, unsigned long new_len, bool *locked,
 500                struct vm_userfaultfd_ctx *uf,
 501                struct list_head *uf_unmap_early,
 502                struct list_head *uf_unmap)
 503{
 504        struct mm_struct *mm = current->mm;
 505        struct vm_area_struct *vma;
 506        unsigned long ret = -EINVAL;
 507        unsigned long charged = 0;
 508        unsigned long map_flags;
 509
 510        if (offset_in_page(new_addr))
 511                goto out;
 512
 513        if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
 514                goto out;
 515
 516        /* Ensure the old/new locations do not overlap */
 517        if (addr + old_len > new_addr && new_addr + new_len > addr)
 518                goto out;
 519
 520        /*
 521         * move_vma() need us to stay 4 maps below the threshold, otherwise
 522         * it will bail out at the very beginning.
 523         * That is a problem if we have already unmaped the regions here
 524         * (new_addr, and old_addr), because userspace will not know the
 525         * state of the vma's after it gets -ENOMEM.
 526         * So, to avoid such scenario we can pre-compute if the whole
 527         * operation has high chances to success map-wise.
 528         * Worst-scenario case is when both vma's (new_addr and old_addr) get
 529         * split in 3 before unmaping it.
 530         * That means 2 more maps (1 for each) to the ones we already hold.
 531         * Check whether current map count plus 2 still leads us to 4 maps below
 532         * the threshold, otherwise return -ENOMEM here to be more safe.
 533         */
 534        if ((mm->map_count + 2) >= sysctl_max_map_count - 3)
 535                return -ENOMEM;
 536
 537        ret = do_munmap(mm, new_addr, new_len, uf_unmap_early);
 538        if (ret)
 539                goto out;
 540
 541        if (old_len >= new_len) {
 542                ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap);
 543                if (ret && old_len != new_len)
 544                        goto out;
 545                old_len = new_len;
 546        }
 547
 548        vma = vma_to_resize(addr, old_len, new_len, &charged);
 549        if (IS_ERR(vma)) {
 550                ret = PTR_ERR(vma);
 551                goto out;
 552        }
 553
 554        map_flags = MAP_FIXED;
 555        if (vma->vm_flags & VM_MAYSHARE)
 556                map_flags |= MAP_SHARED;
 557
 558        ret = get_unmapped_area(vma->vm_file, new_addr, new_len, vma->vm_pgoff +
 559                                ((addr - vma->vm_start) >> PAGE_SHIFT),
 560                                map_flags);
 561        if (offset_in_page(ret))
 562                goto out1;
 563
 564        ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf,
 565                       uf_unmap);
 566        if (!(offset_in_page(ret)))
 567                goto out;
 568out1:
 569        vm_unacct_memory(charged);
 570
 571out:
 572        return ret;
 573}
 574
 575static int vma_expandable(struct vm_area_struct *vma, unsigned long delta)
 576{
 577        unsigned long end = vma->vm_end + delta;
 578        if (end < vma->vm_end) /* overflow */
 579                return 0;
 580        if (vma->vm_next && vma->vm_next->vm_start < end) /* intersection */
 581                return 0;
 582        if (get_unmapped_area(NULL, vma->vm_start, end - vma->vm_start,
 583                              0, MAP_FIXED) & ~PAGE_MASK)
 584                return 0;
 585        return 1;
 586}
 587
 588/*
 589 * Expand (or shrink) an existing mapping, potentially moving it at the
 590 * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
 591 *
 592 * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
 593 * This option implies MREMAP_MAYMOVE.
 594 */
 595SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 596                unsigned long, new_len, unsigned long, flags,
 597                unsigned long, new_addr)
 598{
 599        struct mm_struct *mm = current->mm;
 600        struct vm_area_struct *vma;
 601        unsigned long ret = -EINVAL;
 602        unsigned long charged = 0;
 603        bool locked = false;
 604        bool downgraded = false;
 605        struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
 606        LIST_HEAD(uf_unmap_early);
 607        LIST_HEAD(uf_unmap);
 608
 609        addr = untagged_addr(addr);
 610        new_addr = untagged_addr(new_addr);
 611
 612        if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
 613                return ret;
 614
 615        if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
 616                return ret;
 617
 618        if (offset_in_page(addr))
 619                return ret;
 620
 621        old_len = PAGE_ALIGN(old_len);
 622        new_len = PAGE_ALIGN(new_len);
 623
 624        /*
 625         * We allow a zero old-len as a special case
 626         * for DOS-emu "duplicate shm area" thing. But
 627         * a zero new-len is nonsensical.
 628         */
 629        if (!new_len)
 630                return ret;
 631
 632        if (down_write_killable(&current->mm->mmap_sem))
 633                return -EINTR;
 634
 635        if (flags & MREMAP_FIXED) {
 636                ret = mremap_to(addr, old_len, new_addr, new_len,
 637                                &locked, &uf, &uf_unmap_early, &uf_unmap);
 638                goto out;
 639        }
 640
 641        /*
 642         * Always allow a shrinking remap: that just unmaps
 643         * the unnecessary pages..
 644         * __do_munmap does all the needed commit accounting, and
 645         * downgrades mmap_sem to read if so directed.
 646         */
 647        if (old_len >= new_len) {
 648                int retval;
 649
 650                retval = __do_munmap(mm, addr+new_len, old_len - new_len,
 651                                  &uf_unmap, true);
 652                if (retval < 0 && old_len != new_len) {
 653                        ret = retval;
 654                        goto out;
 655                /* Returning 1 indicates mmap_sem is downgraded to read. */
 656                } else if (retval == 1)
 657                        downgraded = true;
 658                ret = addr;
 659                goto out;
 660        }
 661
 662        /*
 663         * Ok, we need to grow..
 664         */
 665        vma = vma_to_resize(addr, old_len, new_len, &charged);
 666        if (IS_ERR(vma)) {
 667                ret = PTR_ERR(vma);
 668                goto out;
 669        }
 670
 671        /* old_len exactly to the end of the area..
 672         */
 673        if (old_len == vma->vm_end - addr) {
 674                /* can we just expand the current mapping? */
 675                if (vma_expandable(vma, new_len - old_len)) {
 676                        int pages = (new_len - old_len) >> PAGE_SHIFT;
 677
 678                        if (vma_adjust(vma, vma->vm_start, addr + new_len,
 679                                       vma->vm_pgoff, NULL)) {
 680                                ret = -ENOMEM;
 681                                goto out;
 682                        }
 683
 684                        vm_stat_account(mm, vma->vm_flags, pages);
 685                        if (vma->vm_flags & VM_LOCKED) {
 686                                mm->locked_vm += pages;
 687                                locked = true;
 688                                new_addr = addr;
 689                        }
 690                        ret = addr;
 691                        goto out;
 692                }
 693        }
 694
 695        /*
 696         * We weren't able to just expand or shrink the area,
 697         * we need to create a new one and move it..
 698         */
 699        ret = -ENOMEM;
 700        if (flags & MREMAP_MAYMOVE) {
 701                unsigned long map_flags = 0;
 702                if (vma->vm_flags & VM_MAYSHARE)
 703                        map_flags |= MAP_SHARED;
 704
 705                new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
 706                                        vma->vm_pgoff +
 707                                        ((addr - vma->vm_start) >> PAGE_SHIFT),
 708                                        map_flags);
 709                if (offset_in_page(new_addr)) {
 710                        ret = new_addr;
 711                        goto out;
 712                }
 713
 714                ret = move_vma(vma, addr, old_len, new_len, new_addr,
 715                               &locked, &uf, &uf_unmap);
 716        }
 717out:
 718        if (offset_in_page(ret)) {
 719                vm_unacct_memory(charged);
 720                locked = 0;
 721        }
 722        if (downgraded)
 723                up_read(&current->mm->mmap_sem);
 724        else
 725                up_write(&current->mm->mmap_sem);
 726        if (locked && new_len > old_len)
 727                mm_populate(new_addr + old_len, new_len - old_len);
 728        userfaultfd_unmap_complete(mm, &uf_unmap_early);
 729        mremap_userfaultfd_complete(&uf, addr, new_addr, old_len);
 730        userfaultfd_unmap_complete(mm, &uf_unmap);
 731        return ret;
 732}
 733