1
2#include <linux/pagewalk.h>
3#include <linux/highmem.h>
4#include <linux/sched.h>
5#include <linux/hugetlb.h>
6
7
8
9
10
11
12static int real_depth(int depth)
13{
14 if (depth == 3 && PTRS_PER_PMD == 1)
15 depth = 2;
16 if (depth == 2 && PTRS_PER_PUD == 1)
17 depth = 1;
18 if (depth == 1 && PTRS_PER_P4D == 1)
19 depth = 0;
20 return depth;
21}
22
23static int walk_pte_range_inner(pte_t *pte, unsigned long addr,
24 unsigned long end, struct mm_walk *walk)
25{
26 const struct mm_walk_ops *ops = walk->ops;
27 int err = 0;
28
29 for (;;) {
30 err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
31 if (err)
32 break;
33 if (addr >= end - PAGE_SIZE)
34 break;
35 addr += PAGE_SIZE;
36 pte++;
37 }
38 return err;
39}
40
41static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
42 struct mm_walk *walk)
43{
44 pte_t *pte;
45 int err = 0;
46 spinlock_t *ptl;
47
48 if (walk->no_vma) {
49 pte = pte_offset_map(pmd, addr);
50 err = walk_pte_range_inner(pte, addr, end, walk);
51 pte_unmap(pte);
52 } else {
53 pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
54 err = walk_pte_range_inner(pte, addr, end, walk);
55 pte_unmap_unlock(pte, ptl);
56 }
57
58 return err;
59}
60
61#ifdef CONFIG_ARCH_HAS_HUGEPD
62static int walk_hugepd_range(hugepd_t *phpd, unsigned long addr,
63 unsigned long end, struct mm_walk *walk, int pdshift)
64{
65 int err = 0;
66 const struct mm_walk_ops *ops = walk->ops;
67 int shift = hugepd_shift(*phpd);
68 int page_size = 1 << shift;
69
70 if (!ops->pte_entry)
71 return 0;
72
73 if (addr & (page_size - 1))
74 return 0;
75
76 for (;;) {
77 pte_t *pte;
78
79 spin_lock(&walk->mm->page_table_lock);
80 pte = hugepte_offset(*phpd, addr, pdshift);
81 err = ops->pte_entry(pte, addr, addr + page_size, walk);
82 spin_unlock(&walk->mm->page_table_lock);
83
84 if (err)
85 break;
86 if (addr >= end - page_size)
87 break;
88 addr += page_size;
89 }
90 return err;
91}
92#else
93static int walk_hugepd_range(hugepd_t *phpd, unsigned long addr,
94 unsigned long end, struct mm_walk *walk, int pdshift)
95{
96 return 0;
97}
98#endif
99
100static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
101 struct mm_walk *walk)
102{
103 pmd_t *pmd;
104 unsigned long next;
105 const struct mm_walk_ops *ops = walk->ops;
106 int err = 0;
107 int depth = real_depth(3);
108
109 pmd = pmd_offset(pud, addr);
110 do {
111again:
112 next = pmd_addr_end(addr, end);
113 if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) {
114 if (ops->pte_hole)
115 err = ops->pte_hole(addr, next, depth, walk);
116 if (err)
117 break;
118 continue;
119 }
120
121 walk->action = ACTION_SUBTREE;
122
123
124
125
126
127 if (ops->pmd_entry)
128 err = ops->pmd_entry(pmd, addr, next, walk);
129 if (err)
130 break;
131
132 if (walk->action == ACTION_AGAIN)
133 goto again;
134
135
136
137
138
139 if ((!walk->vma && (pmd_leaf(*pmd) || !pmd_present(*pmd))) ||
140 walk->action == ACTION_CONTINUE ||
141 !(ops->pte_entry))
142 continue;
143
144 if (walk->vma) {
145 split_huge_pmd(walk->vma, pmd, addr);
146 if (pmd_trans_unstable(pmd))
147 goto again;
148 }
149
150 if (is_hugepd(__hugepd(pmd_val(*pmd))))
151 err = walk_hugepd_range((hugepd_t *)pmd, addr, next, walk, PMD_SHIFT);
152 else
153 err = walk_pte_range(pmd, addr, next, walk);
154 if (err)
155 break;
156 } while (pmd++, addr = next, addr != end);
157
158 return err;
159}
160
161static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
162 struct mm_walk *walk)
163{
164 pud_t *pud;
165 unsigned long next;
166 const struct mm_walk_ops *ops = walk->ops;
167 int err = 0;
168 int depth = real_depth(2);
169
170 pud = pud_offset(p4d, addr);
171 do {
172 again:
173 next = pud_addr_end(addr, end);
174 if (pud_none(*pud) || (!walk->vma && !walk->no_vma)) {
175 if (ops->pte_hole)
176 err = ops->pte_hole(addr, next, depth, walk);
177 if (err)
178 break;
179 continue;
180 }
181
182 walk->action = ACTION_SUBTREE;
183
184 if (ops->pud_entry)
185 err = ops->pud_entry(pud, addr, next, walk);
186 if (err)
187 break;
188
189 if (walk->action == ACTION_AGAIN)
190 goto again;
191
192 if ((!walk->vma && (pud_leaf(*pud) || !pud_present(*pud))) ||
193 walk->action == ACTION_CONTINUE ||
194 !(ops->pmd_entry || ops->pte_entry))
195 continue;
196
197 if (walk->vma)
198 split_huge_pud(walk->vma, pud, addr);
199 if (pud_none(*pud))
200 goto again;
201
202 if (is_hugepd(__hugepd(pud_val(*pud))))
203 err = walk_hugepd_range((hugepd_t *)pud, addr, next, walk, PUD_SHIFT);
204 else
205 err = walk_pmd_range(pud, addr, next, walk);
206 if (err)
207 break;
208 } while (pud++, addr = next, addr != end);
209
210 return err;
211}
212
213static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
214 struct mm_walk *walk)
215{
216 p4d_t *p4d;
217 unsigned long next;
218 const struct mm_walk_ops *ops = walk->ops;
219 int err = 0;
220 int depth = real_depth(1);
221
222 p4d = p4d_offset(pgd, addr);
223 do {
224 next = p4d_addr_end(addr, end);
225 if (p4d_none_or_clear_bad(p4d)) {
226 if (ops->pte_hole)
227 err = ops->pte_hole(addr, next, depth, walk);
228 if (err)
229 break;
230 continue;
231 }
232 if (ops->p4d_entry) {
233 err = ops->p4d_entry(p4d, addr, next, walk);
234 if (err)
235 break;
236 }
237 if (is_hugepd(__hugepd(p4d_val(*p4d))))
238 err = walk_hugepd_range((hugepd_t *)p4d, addr, next, walk, P4D_SHIFT);
239 else if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
240 err = walk_pud_range(p4d, addr, next, walk);
241 if (err)
242 break;
243 } while (p4d++, addr = next, addr != end);
244
245 return err;
246}
247
248static int walk_pgd_range(unsigned long addr, unsigned long end,
249 struct mm_walk *walk)
250{
251 pgd_t *pgd;
252 unsigned long next;
253 const struct mm_walk_ops *ops = walk->ops;
254 int err = 0;
255
256 if (walk->pgd)
257 pgd = walk->pgd + pgd_index(addr);
258 else
259 pgd = pgd_offset(walk->mm, addr);
260 do {
261 next = pgd_addr_end(addr, end);
262 if (pgd_none_or_clear_bad(pgd)) {
263 if (ops->pte_hole)
264 err = ops->pte_hole(addr, next, 0, walk);
265 if (err)
266 break;
267 continue;
268 }
269 if (ops->pgd_entry) {
270 err = ops->pgd_entry(pgd, addr, next, walk);
271 if (err)
272 break;
273 }
274 if (is_hugepd(__hugepd(pgd_val(*pgd))))
275 err = walk_hugepd_range((hugepd_t *)pgd, addr, next, walk, PGDIR_SHIFT);
276 else if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry || ops->pte_entry)
277 err = walk_p4d_range(pgd, addr, next, walk);
278 if (err)
279 break;
280 } while (pgd++, addr = next, addr != end);
281
282 return err;
283}
284
285#ifdef CONFIG_HUGETLB_PAGE
286static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
287 unsigned long end)
288{
289 unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
290 return boundary < end ? boundary : end;
291}
292
293static int walk_hugetlb_range(unsigned long addr, unsigned long end,
294 struct mm_walk *walk)
295{
296 struct vm_area_struct *vma = walk->vma;
297 struct hstate *h = hstate_vma(vma);
298 unsigned long next;
299 unsigned long hmask = huge_page_mask(h);
300 unsigned long sz = huge_page_size(h);
301 pte_t *pte;
302 const struct mm_walk_ops *ops = walk->ops;
303 int err = 0;
304
305 do {
306 next = hugetlb_entry_end(h, addr, end);
307 pte = huge_pte_offset(walk->mm, addr & hmask, sz);
308
309 if (pte)
310 err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
311 else if (ops->pte_hole)
312 err = ops->pte_hole(addr, next, -1, walk);
313
314 if (err)
315 break;
316 } while (addr = next, addr != end);
317
318 return err;
319}
320
321#else
322static int walk_hugetlb_range(unsigned long addr, unsigned long end,
323 struct mm_walk *walk)
324{
325 return 0;
326}
327
328#endif
329
330
331
332
333
334
335
336static int walk_page_test(unsigned long start, unsigned long end,
337 struct mm_walk *walk)
338{
339 struct vm_area_struct *vma = walk->vma;
340 const struct mm_walk_ops *ops = walk->ops;
341
342 if (ops->test_walk)
343 return ops->test_walk(start, end, walk);
344
345
346
347
348
349
350
351
352
353 if (vma->vm_flags & VM_PFNMAP) {
354 int err = 1;
355 if (ops->pte_hole)
356 err = ops->pte_hole(start, end, -1, walk);
357 return err ? err : 1;
358 }
359 return 0;
360}
361
362static int __walk_page_range(unsigned long start, unsigned long end,
363 struct mm_walk *walk)
364{
365 int err = 0;
366 struct vm_area_struct *vma = walk->vma;
367 const struct mm_walk_ops *ops = walk->ops;
368
369 if (vma && ops->pre_vma) {
370 err = ops->pre_vma(start, end, walk);
371 if (err)
372 return err;
373 }
374
375 if (vma && is_vm_hugetlb_page(vma)) {
376 if (ops->hugetlb_entry)
377 err = walk_hugetlb_range(start, end, walk);
378 } else
379 err = walk_pgd_range(start, end, walk);
380
381 if (vma && ops->post_vma)
382 ops->post_vma(walk);
383
384 return err;
385}
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427int walk_page_range(struct mm_struct *mm, unsigned long start,
428 unsigned long end, const struct mm_walk_ops *ops,
429 void *private)
430{
431 int err = 0;
432 unsigned long next;
433 struct vm_area_struct *vma;
434 struct mm_walk walk = {
435 .ops = ops,
436 .mm = mm,
437 .private = private,
438 };
439
440 if (start >= end)
441 return -EINVAL;
442
443 if (!walk.mm)
444 return -EINVAL;
445
446 mmap_assert_locked(walk.mm);
447
448 vma = find_vma(walk.mm, start);
449 do {
450 if (!vma) {
451 walk.vma = NULL;
452 next = end;
453 } else if (start < vma->vm_start) {
454 walk.vma = NULL;
455 next = min(end, vma->vm_start);
456 } else {
457 walk.vma = vma;
458 next = min(end, vma->vm_end);
459 vma = vma->vm_next;
460
461 err = walk_page_test(start, next, &walk);
462 if (err > 0) {
463
464
465
466
467
468 err = 0;
469 continue;
470 }
471 if (err < 0)
472 break;
473 }
474 if (walk.vma || walk.ops->pte_hole)
475 err = __walk_page_range(start, next, &walk);
476 if (err)
477 break;
478 } while (start = next, start < end);
479 return err;
480}
481
482
483
484
485
486
487
488int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
489 unsigned long end, const struct mm_walk_ops *ops,
490 pgd_t *pgd,
491 void *private)
492{
493 struct mm_walk walk = {
494 .ops = ops,
495 .mm = mm,
496 .pgd = pgd,
497 .private = private,
498 .no_vma = true
499 };
500
501 if (start >= end || !walk.mm)
502 return -EINVAL;
503
504 mmap_assert_locked(walk.mm);
505
506 return __walk_page_range(start, end, &walk);
507}
508
509int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
510 void *private)
511{
512 struct mm_walk walk = {
513 .ops = ops,
514 .mm = vma->vm_mm,
515 .vma = vma,
516 .private = private,
517 };
518 int err;
519
520 if (!walk.mm)
521 return -EINVAL;
522
523 mmap_assert_locked(walk.mm);
524
525 err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
526 if (err > 0)
527 return 0;
528 if (err < 0)
529 return err;
530 return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
531}
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
564 pgoff_t nr, const struct mm_walk_ops *ops,
565 void *private)
566{
567 struct mm_walk walk = {
568 .ops = ops,
569 .private = private,
570 };
571 struct vm_area_struct *vma;
572 pgoff_t vba, vea, cba, cea;
573 unsigned long start_addr, end_addr;
574 int err = 0;
575
576 lockdep_assert_held(&mapping->i_mmap_rwsem);
577 vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index,
578 first_index + nr - 1) {
579
580 vba = vma->vm_pgoff;
581 vea = vba + vma_pages(vma);
582 cba = first_index;
583 cba = max(cba, vba);
584 cea = first_index + nr;
585 cea = min(cea, vea);
586
587 start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start;
588 end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start;
589 if (start_addr >= end_addr)
590 continue;
591
592 walk.vma = vma;
593 walk.mm = vma->vm_mm;
594
595 err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
596 if (err > 0) {
597 err = 0;
598 break;
599 } else if (err < 0)
600 break;
601
602 err = __walk_page_range(start_addr, end_addr, &walk);
603 if (err)
604 break;
605 }
606
607 return err;
608}
609