1
2
3
4
5
6
7
8
9
10#include <linux/mm.h>
11#include <linux/sched/signal.h>
12#include <linux/pagemap.h>
13#include <linux/rmap.h>
14#include <linux/swap.h>
15#include <linux/swapops.h>
16#include <linux/userfaultfd_k.h>
17#include <linux/mmu_notifier.h>
18#include <linux/hugetlb.h>
19#include <linux/shmem_fs.h>
20#include <asm/tlbflush.h>
21#include "internal.h"
22
23static int mcopy_atomic_pte(struct mm_struct *dst_mm,
24 pmd_t *dst_pmd,
25 struct vm_area_struct *dst_vma,
26 unsigned long dst_addr,
27 unsigned long src_addr,
28 struct page **pagep)
29{
30 struct mem_cgroup *memcg;
31 pte_t _dst_pte, *dst_pte;
32 spinlock_t *ptl;
33 void *page_kaddr;
34 int ret;
35 struct page *page;
36
37 if (!*pagep) {
38 ret = -ENOMEM;
39 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
40 if (!page)
41 goto out;
42
43 page_kaddr = kmap_atomic(page);
44 ret = copy_from_user(page_kaddr,
45 (const void __user *) src_addr,
46 PAGE_SIZE);
47 kunmap_atomic(page_kaddr);
48
49
50 if (unlikely(ret)) {
51 ret = -EFAULT;
52 *pagep = page;
53
54 goto out;
55 }
56 } else {
57 page = *pagep;
58 *pagep = NULL;
59 }
60
61
62
63
64
65
66 __SetPageUptodate(page);
67
68 ret = -ENOMEM;
69 if (mem_cgroup_try_charge(page, dst_mm, GFP_KERNEL, &memcg, false))
70 goto out_release;
71
72 _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
73 if (dst_vma->vm_flags & VM_WRITE)
74 _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
75
76 ret = -EEXIST;
77 dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
78 if (!pte_none(*dst_pte))
79 goto out_release_uncharge_unlock;
80
81 inc_mm_counter(dst_mm, MM_ANONPAGES);
82 page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
83 mem_cgroup_commit_charge(page, memcg, false, false);
84 lru_cache_add_active_or_unevictable(page, dst_vma);
85
86 set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
87
88
89 update_mmu_cache(dst_vma, dst_addr, dst_pte);
90
91 pte_unmap_unlock(dst_pte, ptl);
92 ret = 0;
93out:
94 return ret;
95out_release_uncharge_unlock:
96 pte_unmap_unlock(dst_pte, ptl);
97 mem_cgroup_cancel_charge(page, memcg, false);
98out_release:
99 put_page(page);
100 goto out;
101}
102
103static int mfill_zeropage_pte(struct mm_struct *dst_mm,
104 pmd_t *dst_pmd,
105 struct vm_area_struct *dst_vma,
106 unsigned long dst_addr)
107{
108 pte_t _dst_pte, *dst_pte;
109 spinlock_t *ptl;
110 int ret;
111
112 _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
113 dst_vma->vm_page_prot));
114 ret = -EEXIST;
115 dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
116 if (!pte_none(*dst_pte))
117 goto out_unlock;
118 set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
119
120 update_mmu_cache(dst_vma, dst_addr, dst_pte);
121 ret = 0;
122out_unlock:
123 pte_unmap_unlock(dst_pte, ptl);
124 return ret;
125}
126
127static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
128{
129 pgd_t *pgd;
130 p4d_t *p4d;
131 pud_t *pud;
132
133 pgd = pgd_offset(mm, address);
134 p4d = p4d_alloc(mm, pgd, address);
135 if (!p4d)
136 return NULL;
137 pud = pud_alloc(mm, p4d, address);
138 if (!pud)
139 return NULL;
140
141
142
143
144
145 return pmd_alloc(mm, pud, address);
146}
147
148#ifdef CONFIG_HUGETLB_PAGE
149
150
151
152
153static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
154 struct vm_area_struct *dst_vma,
155 unsigned long dst_start,
156 unsigned long src_start,
157 unsigned long len,
158 bool zeropage)
159{
160 int vm_alloc_shared = dst_vma->vm_flags & VM_SHARED;
161 int vm_shared = dst_vma->vm_flags & VM_SHARED;
162 ssize_t err;
163 pte_t *dst_pte;
164 unsigned long src_addr, dst_addr;
165 long copied;
166 struct page *page;
167 struct hstate *h;
168 unsigned long vma_hpagesize;
169 pgoff_t idx;
170 u32 hash;
171 struct address_space *mapping;
172
173
174
175
176
177
178
179 if (zeropage) {
180 up_read(&dst_mm->mmap_sem);
181 return -EINVAL;
182 }
183
184 src_addr = src_start;
185 dst_addr = dst_start;
186 copied = 0;
187 page = NULL;
188 vma_hpagesize = vma_kernel_pagesize(dst_vma);
189
190
191
192
193 err = -EINVAL;
194 if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1))
195 goto out_unlock;
196
197retry:
198
199
200
201
202 if (!dst_vma) {
203 err = -ENOENT;
204 dst_vma = find_vma(dst_mm, dst_start);
205 if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
206 goto out_unlock;
207
208
209
210
211 if (!dst_vma->vm_userfaultfd_ctx.ctx)
212 goto out_unlock;
213
214 if (dst_start < dst_vma->vm_start ||
215 dst_start + len > dst_vma->vm_end)
216 goto out_unlock;
217
218 err = -EINVAL;
219 if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
220 goto out_unlock;
221
222 vm_shared = dst_vma->vm_flags & VM_SHARED;
223 }
224
225 if (WARN_ON(dst_addr & (vma_hpagesize - 1) ||
226 (len - copied) & (vma_hpagesize - 1)))
227 goto out_unlock;
228
229
230
231
232 err = -ENOMEM;
233 if (!vm_shared) {
234 if (unlikely(anon_vma_prepare(dst_vma)))
235 goto out_unlock;
236 }
237
238 h = hstate_vma(dst_vma);
239
240 while (src_addr < src_start + len) {
241 pte_t dst_pteval;
242
243 BUG_ON(dst_addr >= dst_start + len);
244 VM_BUG_ON(dst_addr & ~huge_page_mask(h));
245
246
247
248
249 idx = linear_page_index(dst_vma, dst_addr);
250 mapping = dst_vma->vm_file->f_mapping;
251 hash = hugetlb_fault_mutex_hash(h, dst_mm, dst_vma, mapping,
252 idx, dst_addr);
253 mutex_lock(&hugetlb_fault_mutex_table[hash]);
254
255 err = -ENOMEM;
256 dst_pte = huge_pte_alloc(dst_mm, dst_addr, huge_page_size(h));
257 if (!dst_pte) {
258 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
259 goto out_unlock;
260 }
261
262 err = -EEXIST;
263 dst_pteval = huge_ptep_get(dst_pte);
264 if (!huge_pte_none(dst_pteval)) {
265 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
266 goto out_unlock;
267 }
268
269 err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
270 dst_addr, src_addr, &page);
271
272 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
273 vm_alloc_shared = vm_shared;
274
275 cond_resched();
276
277 if (unlikely(err == -EFAULT)) {
278 up_read(&dst_mm->mmap_sem);
279 BUG_ON(!page);
280
281 err = copy_huge_page_from_user(page,
282 (const void __user *)src_addr,
283 pages_per_huge_page(h), true);
284 if (unlikely(err)) {
285 err = -EFAULT;
286 goto out;
287 }
288 down_read(&dst_mm->mmap_sem);
289
290 dst_vma = NULL;
291 goto retry;
292 } else
293 BUG_ON(page);
294
295 if (!err) {
296 dst_addr += vma_hpagesize;
297 src_addr += vma_hpagesize;
298 copied += vma_hpagesize;
299
300 if (fatal_signal_pending(current))
301 err = -EINTR;
302 }
303 if (err)
304 break;
305 }
306
307out_unlock:
308 up_read(&dst_mm->mmap_sem);
309out:
310 if (page) {
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352 if (vm_alloc_shared)
353 SetPagePrivate(page);
354 else
355 ClearPagePrivate(page);
356 put_page(page);
357 }
358 BUG_ON(copied < 0);
359 BUG_ON(err > 0);
360 BUG_ON(!copied && !err);
361 return copied ? copied : err;
362}
363#else
364
365extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
366 struct vm_area_struct *dst_vma,
367 unsigned long dst_start,
368 unsigned long src_start,
369 unsigned long len,
370 bool zeropage);
371#endif
372
373static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
374 pmd_t *dst_pmd,
375 struct vm_area_struct *dst_vma,
376 unsigned long dst_addr,
377 unsigned long src_addr,
378 struct page **page,
379 bool zeropage)
380{
381 ssize_t err;
382
383 if (vma_is_anonymous(dst_vma)) {
384 if (!zeropage)
385 err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
386 dst_addr, src_addr, page);
387 else
388 err = mfill_zeropage_pte(dst_mm, dst_pmd,
389 dst_vma, dst_addr);
390 } else {
391 if (!zeropage)
392 err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd,
393 dst_vma, dst_addr,
394 src_addr, page);
395 else
396 err = shmem_mfill_zeropage_pte(dst_mm, dst_pmd,
397 dst_vma, dst_addr);
398 }
399
400 return err;
401}
402
403static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
404 unsigned long dst_start,
405 unsigned long src_start,
406 unsigned long len,
407 bool zeropage)
408{
409 struct vm_area_struct *dst_vma;
410 ssize_t err;
411 pmd_t *dst_pmd;
412 unsigned long src_addr, dst_addr;
413 long copied;
414 struct page *page;
415
416
417
418
419 BUG_ON(dst_start & ~PAGE_MASK);
420 BUG_ON(len & ~PAGE_MASK);
421
422
423 BUG_ON(src_start + len <= src_start);
424 BUG_ON(dst_start + len <= dst_start);
425
426 src_addr = src_start;
427 dst_addr = dst_start;
428 copied = 0;
429 page = NULL;
430retry:
431 down_read(&dst_mm->mmap_sem);
432
433
434
435
436
437 err = -ENOENT;
438 dst_vma = find_vma(dst_mm, dst_start);
439 if (!dst_vma)
440 goto out_unlock;
441
442
443
444
445
446
447
448
449
450 if (!dst_vma->vm_userfaultfd_ctx.ctx)
451 goto out_unlock;
452
453 if (dst_start < dst_vma->vm_start ||
454 dst_start + len > dst_vma->vm_end)
455 goto out_unlock;
456
457 err = -EINVAL;
458
459
460
461
462 if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
463 dst_vma->vm_flags & VM_SHARED))
464 goto out_unlock;
465
466
467
468
469 if (is_vm_hugetlb_page(dst_vma))
470 return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
471 src_start, len, zeropage);
472
473 if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
474 goto out_unlock;
475
476
477
478
479
480
481 err = -ENOMEM;
482 if (vma_is_anonymous(dst_vma) && unlikely(anon_vma_prepare(dst_vma)))
483 goto out_unlock;
484
485 while (src_addr < src_start + len) {
486 pmd_t dst_pmdval;
487
488 BUG_ON(dst_addr >= dst_start + len);
489
490 dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
491 if (unlikely(!dst_pmd)) {
492 err = -ENOMEM;
493 break;
494 }
495
496 dst_pmdval = pmd_read_atomic(dst_pmd);
497
498
499
500
501 if (unlikely(pmd_trans_huge(dst_pmdval))) {
502 err = -EEXIST;
503 break;
504 }
505 if (unlikely(pmd_none(dst_pmdval)) &&
506 unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
507 err = -ENOMEM;
508 break;
509 }
510
511 if (unlikely(pmd_trans_huge(*dst_pmd))) {
512 err = -EFAULT;
513 break;
514 }
515
516 BUG_ON(pmd_none(*dst_pmd));
517 BUG_ON(pmd_trans_huge(*dst_pmd));
518
519 err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
520 src_addr, &page, zeropage);
521 cond_resched();
522
523 if (unlikely(err == -EFAULT)) {
524 void *page_kaddr;
525
526 up_read(&dst_mm->mmap_sem);
527 BUG_ON(!page);
528
529 page_kaddr = kmap(page);
530 err = copy_from_user(page_kaddr,
531 (const void __user *) src_addr,
532 PAGE_SIZE);
533 kunmap(page);
534 if (unlikely(err)) {
535 err = -EFAULT;
536 goto out;
537 }
538 goto retry;
539 } else
540 BUG_ON(page);
541
542 if (!err) {
543 dst_addr += PAGE_SIZE;
544 src_addr += PAGE_SIZE;
545 copied += PAGE_SIZE;
546
547 if (fatal_signal_pending(current))
548 err = -EINTR;
549 }
550 if (err)
551 break;
552 }
553
554out_unlock:
555 up_read(&dst_mm->mmap_sem);
556out:
557 if (page)
558 put_page(page);
559 BUG_ON(copied < 0);
560 BUG_ON(err > 0);
561 BUG_ON(!copied && !err);
562 return copied ? copied : err;
563}
564
565ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
566 unsigned long src_start, unsigned long len)
567{
568 return __mcopy_atomic(dst_mm, dst_start, src_start, len, false);
569}
570
571ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
572 unsigned long len)
573{
574 return __mcopy_atomic(dst_mm, start, 0, len, true);
575}
576