1
2
3
4
5
6
7
8
9
10#include <linux/mm.h>
11#include <linux/sched/signal.h>
12#include <linux/pagemap.h>
13#include <linux/rmap.h>
14#include <linux/swap.h>
15#include <linux/swapops.h>
16#include <linux/userfaultfd_k.h>
17#include <linux/mmu_notifier.h>
18#include <linux/hugetlb.h>
19#include <linux/shmem_fs.h>
20#include <asm/tlbflush.h>
21#include "internal.h"
22
23static int mcopy_atomic_pte(struct mm_struct *dst_mm,
24 pmd_t *dst_pmd,
25 struct vm_area_struct *dst_vma,
26 unsigned long dst_addr,
27 unsigned long src_addr,
28 struct page **pagep)
29{
30 struct mem_cgroup *memcg;
31 pte_t _dst_pte, *dst_pte;
32 spinlock_t *ptl;
33 void *page_kaddr;
34 int ret;
35 struct page *page;
36
37 if (!*pagep) {
38 ret = -ENOMEM;
39 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
40 if (!page)
41 goto out;
42
43 page_kaddr = kmap_atomic(page);
44 ret = copy_from_user(page_kaddr,
45 (const void __user *) src_addr,
46 PAGE_SIZE);
47 kunmap_atomic(page_kaddr);
48
49
50 if (unlikely(ret)) {
51 ret = -EFAULT;
52 *pagep = page;
53
54 goto out;
55 }
56 } else {
57 page = *pagep;
58 *pagep = NULL;
59 }
60
61
62
63
64
65
66 __SetPageUptodate(page);
67
68 ret = -ENOMEM;
69 if (mem_cgroup_try_charge(page, dst_mm, GFP_KERNEL, &memcg, false))
70 goto out_release;
71
72 _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
73 if (dst_vma->vm_flags & VM_WRITE)
74 _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
75
76 ret = -EEXIST;
77 dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
78 if (!pte_none(*dst_pte))
79 goto out_release_uncharge_unlock;
80
81 inc_mm_counter(dst_mm, MM_ANONPAGES);
82 page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
83 mem_cgroup_commit_charge(page, memcg, false, false);
84 lru_cache_add_active_or_unevictable(page, dst_vma);
85
86 set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
87
88
89 update_mmu_cache(dst_vma, dst_addr, dst_pte);
90
91 pte_unmap_unlock(dst_pte, ptl);
92 ret = 0;
93out:
94 return ret;
95out_release_uncharge_unlock:
96 pte_unmap_unlock(dst_pte, ptl);
97 mem_cgroup_cancel_charge(page, memcg, false);
98out_release:
99 put_page(page);
100 goto out;
101}
102
103static int mfill_zeropage_pte(struct mm_struct *dst_mm,
104 pmd_t *dst_pmd,
105 struct vm_area_struct *dst_vma,
106 unsigned long dst_addr)
107{
108 pte_t _dst_pte, *dst_pte;
109 spinlock_t *ptl;
110 int ret;
111
112 _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
113 dst_vma->vm_page_prot));
114 ret = -EEXIST;
115 dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
116 if (!pte_none(*dst_pte))
117 goto out_unlock;
118 set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
119
120 update_mmu_cache(dst_vma, dst_addr, dst_pte);
121 ret = 0;
122out_unlock:
123 pte_unmap_unlock(dst_pte, ptl);
124 return ret;
125}
126
127static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
128{
129 pgd_t *pgd;
130 p4d_t *p4d;
131 pud_t *pud;
132
133 pgd = pgd_offset(mm, address);
134 p4d = p4d_alloc(mm, pgd, address);
135 if (!p4d)
136 return NULL;
137 pud = pud_alloc(mm, p4d, address);
138 if (!pud)
139 return NULL;
140
141
142
143
144
145 return pmd_alloc(mm, pud, address);
146}
147
148#ifdef CONFIG_HUGETLB_PAGE
149
150
151
152
153static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
154 struct vm_area_struct *dst_vma,
155 unsigned long dst_start,
156 unsigned long src_start,
157 unsigned long len,
158 bool zeropage)
159{
160 int vm_alloc_shared = dst_vma->vm_flags & VM_SHARED;
161 int vm_shared = dst_vma->vm_flags & VM_SHARED;
162 ssize_t err;
163 pte_t *dst_pte;
164 unsigned long src_addr, dst_addr;
165 long copied;
166 struct page *page;
167 struct hstate *h;
168 unsigned long vma_hpagesize;
169 pgoff_t idx;
170 u32 hash;
171 struct address_space *mapping;
172
173
174
175
176
177
178
179 if (zeropage) {
180 up_read(&dst_mm->mmap_sem);
181 return -EINVAL;
182 }
183
184 src_addr = src_start;
185 dst_addr = dst_start;
186 copied = 0;
187 page = NULL;
188 vma_hpagesize = vma_kernel_pagesize(dst_vma);
189
190
191
192
193 err = -EINVAL;
194 if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1))
195 goto out_unlock;
196
197retry:
198
199
200
201
202 if (!dst_vma) {
203 err = -ENOENT;
204 dst_vma = find_vma(dst_mm, dst_start);
205 if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
206 goto out_unlock;
207
208
209
210
211 if (!dst_vma->vm_userfaultfd_ctx.ctx)
212 goto out_unlock;
213
214 if (dst_start < dst_vma->vm_start ||
215 dst_start + len > dst_vma->vm_end)
216 goto out_unlock;
217
218 err = -EINVAL;
219 if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
220 goto out_unlock;
221
222 vm_shared = dst_vma->vm_flags & VM_SHARED;
223 }
224
225 if (WARN_ON(dst_addr & (vma_hpagesize - 1) ||
226 (len - copied) & (vma_hpagesize - 1)))
227 goto out_unlock;
228
229
230
231
232 err = -ENOMEM;
233 if (!vm_shared) {
234 if (unlikely(anon_vma_prepare(dst_vma)))
235 goto out_unlock;
236 }
237
238 h = hstate_vma(dst_vma);
239
240 while (src_addr < src_start + len) {
241 pte_t dst_pteval;
242
243 BUG_ON(dst_addr >= dst_start + len);
244 VM_BUG_ON(dst_addr & ~huge_page_mask(h));
245
246
247
248
249 idx = linear_page_index(dst_vma, dst_addr);
250 mapping = dst_vma->vm_file->f_mapping;
251 hash = hugetlb_fault_mutex_hash(h, dst_mm, dst_vma, mapping,
252 idx, dst_addr);
253 mutex_lock(&hugetlb_fault_mutex_table[hash]);
254
255 err = -ENOMEM;
256 dst_pte = huge_pte_alloc(dst_mm, dst_addr, huge_page_size(h));
257 if (!dst_pte) {
258 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
259 goto out_unlock;
260 }
261
262 err = -EEXIST;
263 dst_pteval = huge_ptep_get(dst_pte);
264 if (!huge_pte_none(dst_pteval)) {
265 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
266 goto out_unlock;
267 }
268
269 err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
270 dst_addr, src_addr, &page);
271
272 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
273 vm_alloc_shared = vm_shared;
274
275 cond_resched();
276
277 if (unlikely(err == -EFAULT)) {
278 up_read(&dst_mm->mmap_sem);
279 BUG_ON(!page);
280
281 err = copy_huge_page_from_user(page,
282 (const void __user *)src_addr,
283 pages_per_huge_page(h), true);
284 if (unlikely(err)) {
285 err = -EFAULT;
286 goto out;
287 }
288 down_read(&dst_mm->mmap_sem);
289
290 dst_vma = NULL;
291 goto retry;
292 } else
293 BUG_ON(page);
294
295 if (!err) {
296 dst_addr += vma_hpagesize;
297 src_addr += vma_hpagesize;
298 copied += vma_hpagesize;
299
300 if (fatal_signal_pending(current))
301 err = -EINTR;
302 }
303 if (err)
304 break;
305 }
306
307out_unlock:
308 up_read(&dst_mm->mmap_sem);
309out:
310 if (page) {
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352 if (vm_alloc_shared)
353 SetPagePrivate(page);
354 else
355 ClearPagePrivate(page);
356 put_page(page);
357 }
358 BUG_ON(copied < 0);
359 BUG_ON(err > 0);
360 BUG_ON(!copied && !err);
361 return copied ? copied : err;
362}
363#else
364
365extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
366 struct vm_area_struct *dst_vma,
367 unsigned long dst_start,
368 unsigned long src_start,
369 unsigned long len,
370 bool zeropage);
371#endif
372
373static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
374 pmd_t *dst_pmd,
375 struct vm_area_struct *dst_vma,
376 unsigned long dst_addr,
377 unsigned long src_addr,
378 struct page **page,
379 bool zeropage)
380{
381 ssize_t err;
382
383 if (vma_is_anonymous(dst_vma)) {
384 if (!zeropage)
385 err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
386 dst_addr, src_addr, page);
387 else
388 err = mfill_zeropage_pte(dst_mm, dst_pmd,
389 dst_vma, dst_addr);
390 } else {
391 if (!zeropage)
392 err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd,
393 dst_vma, dst_addr,
394 src_addr, page);
395 else
396 err = shmem_mfill_zeropage_pte(dst_mm, dst_pmd,
397 dst_vma, dst_addr);
398 }
399
400 return err;
401}
402
403static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
404 unsigned long dst_start,
405 unsigned long src_start,
406 unsigned long len,
407 bool zeropage,
408 bool *mmap_changing)
409{
410 struct vm_area_struct *dst_vma;
411 ssize_t err;
412 pmd_t *dst_pmd;
413 unsigned long src_addr, dst_addr;
414 long copied;
415 struct page *page;
416
417
418
419
420 BUG_ON(dst_start & ~PAGE_MASK);
421 BUG_ON(len & ~PAGE_MASK);
422
423
424 BUG_ON(src_start + len <= src_start);
425 BUG_ON(dst_start + len <= dst_start);
426
427 src_addr = src_start;
428 dst_addr = dst_start;
429 copied = 0;
430 page = NULL;
431retry:
432 down_read(&dst_mm->mmap_sem);
433
434
435
436
437
438
439 err = -EAGAIN;
440 if (mmap_changing && READ_ONCE(*mmap_changing))
441 goto out_unlock;
442
443
444
445
446
447 err = -ENOENT;
448 dst_vma = find_vma(dst_mm, dst_start);
449 if (!dst_vma)
450 goto out_unlock;
451
452
453
454
455
456
457
458
459
460 if (!dst_vma->vm_userfaultfd_ctx.ctx)
461 goto out_unlock;
462
463 if (dst_start < dst_vma->vm_start ||
464 dst_start + len > dst_vma->vm_end)
465 goto out_unlock;
466
467 err = -EINVAL;
468
469
470
471
472 if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
473 dst_vma->vm_flags & VM_SHARED))
474 goto out_unlock;
475
476
477
478
479 if (is_vm_hugetlb_page(dst_vma))
480 return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
481 src_start, len, zeropage);
482
483 if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
484 goto out_unlock;
485
486
487
488
489
490
491 err = -ENOMEM;
492 if (vma_is_anonymous(dst_vma) && unlikely(anon_vma_prepare(dst_vma)))
493 goto out_unlock;
494
495 while (src_addr < src_start + len) {
496 pmd_t dst_pmdval;
497
498 BUG_ON(dst_addr >= dst_start + len);
499
500 dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
501 if (unlikely(!dst_pmd)) {
502 err = -ENOMEM;
503 break;
504 }
505
506 dst_pmdval = pmd_read_atomic(dst_pmd);
507
508
509
510
511 if (unlikely(pmd_trans_huge(dst_pmdval))) {
512 err = -EEXIST;
513 break;
514 }
515 if (unlikely(pmd_none(dst_pmdval)) &&
516 unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
517 err = -ENOMEM;
518 break;
519 }
520
521 if (unlikely(pmd_trans_huge(*dst_pmd))) {
522 err = -EFAULT;
523 break;
524 }
525
526 BUG_ON(pmd_none(*dst_pmd));
527 BUG_ON(pmd_trans_huge(*dst_pmd));
528
529 err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
530 src_addr, &page, zeropage);
531 cond_resched();
532
533 if (unlikely(err == -EFAULT)) {
534 void *page_kaddr;
535
536 up_read(&dst_mm->mmap_sem);
537 BUG_ON(!page);
538
539 page_kaddr = kmap(page);
540 err = copy_from_user(page_kaddr,
541 (const void __user *) src_addr,
542 PAGE_SIZE);
543 kunmap(page);
544 if (unlikely(err)) {
545 err = -EFAULT;
546 goto out;
547 }
548 goto retry;
549 } else
550 BUG_ON(page);
551
552 if (!err) {
553 dst_addr += PAGE_SIZE;
554 src_addr += PAGE_SIZE;
555 copied += PAGE_SIZE;
556
557 if (fatal_signal_pending(current))
558 err = -EINTR;
559 }
560 if (err)
561 break;
562 }
563
564out_unlock:
565 up_read(&dst_mm->mmap_sem);
566out:
567 if (page)
568 put_page(page);
569 BUG_ON(copied < 0);
570 BUG_ON(err > 0);
571 BUG_ON(!copied && !err);
572 return copied ? copied : err;
573}
574
575ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
576 unsigned long src_start, unsigned long len,
577 bool *mmap_changing)
578{
579 return __mcopy_atomic(dst_mm, dst_start, src_start, len, false,
580 mmap_changing);
581}
582
583ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
584 unsigned long len, bool *mmap_changing)
585{
586 return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing);
587}
588