1
2#include <linux/pagewalk.h>
3#include <linux/highmem.h>
4#include <linux/sched.h>
5#include <linux/hugetlb.h>
6
7
8
9
10
11
12static int real_depth(int depth)
13{
14 if (depth == 3 && PTRS_PER_PMD == 1)
15 depth = 2;
16 if (depth == 2 && PTRS_PER_PUD == 1)
17 depth = 1;
18 if (depth == 1 && PTRS_PER_P4D == 1)
19 depth = 0;
20 return depth;
21}
22
23static int walk_pte_range_inner(pte_t *pte, unsigned long addr,
24 unsigned long end, struct mm_walk *walk)
25{
26 const struct mm_walk_ops *ops = walk->ops;
27 int err = 0;
28
29 for (;;) {
30 err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
31 if (err)
32 break;
33 if (addr >= end - PAGE_SIZE)
34 break;
35 addr += PAGE_SIZE;
36 pte++;
37 }
38 return err;
39}
40
41static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
42 struct mm_walk *walk)
43{
44 pte_t *pte;
45 int err = 0;
46 spinlock_t *ptl;
47
48 if (walk->no_vma) {
49 pte = pte_offset_map(pmd, addr);
50 err = walk_pte_range_inner(pte, addr, end, walk);
51 pte_unmap(pte);
52 } else {
53 pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
54 err = walk_pte_range_inner(pte, addr, end, walk);
55 pte_unmap_unlock(pte, ptl);
56 }
57
58 return err;
59}
60
61static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
62 struct mm_walk *walk)
63{
64 pmd_t *pmd;
65 unsigned long next;
66 const struct mm_walk_ops *ops = walk->ops;
67 int err = 0;
68 int depth = real_depth(3);
69
70 pmd = pmd_offset(pud, addr);
71 do {
72again:
73 next = pmd_addr_end(addr, end);
74 if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) {
75 if (ops->pte_hole)
76 err = ops->pte_hole(addr, next, depth, walk);
77 if (err)
78 break;
79 continue;
80 }
81
82 walk->action = ACTION_SUBTREE;
83
84
85
86
87
88 if (ops->pmd_entry)
89 err = ops->pmd_entry(pmd, addr, next, walk);
90 if (err)
91 break;
92
93 if (walk->action == ACTION_AGAIN)
94 goto again;
95
96
97
98
99
100 if ((!walk->vma && (pmd_leaf(*pmd) || !pmd_present(*pmd))) ||
101 walk->action == ACTION_CONTINUE ||
102 !(ops->pte_entry))
103 continue;
104
105 if (walk->vma) {
106 split_huge_pmd(walk->vma, pmd, addr);
107 if (pmd_trans_unstable(pmd))
108 goto again;
109 }
110
111 err = walk_pte_range(pmd, addr, next, walk);
112 if (err)
113 break;
114 } while (pmd++, addr = next, addr != end);
115
116 return err;
117}
118
119static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
120 struct mm_walk *walk)
121{
122 pud_t *pud;
123 unsigned long next;
124 const struct mm_walk_ops *ops = walk->ops;
125 int err = 0;
126 int depth = real_depth(2);
127
128 pud = pud_offset(p4d, addr);
129 do {
130 again:
131 next = pud_addr_end(addr, end);
132 if (pud_none(*pud) || (!walk->vma && !walk->no_vma)) {
133 if (ops->pte_hole)
134 err = ops->pte_hole(addr, next, depth, walk);
135 if (err)
136 break;
137 continue;
138 }
139
140 walk->action = ACTION_SUBTREE;
141
142 if (ops->pud_entry)
143 err = ops->pud_entry(pud, addr, next, walk);
144 if (err)
145 break;
146
147 if (walk->action == ACTION_AGAIN)
148 goto again;
149
150 if ((!walk->vma && (pud_leaf(*pud) || !pud_present(*pud))) ||
151 walk->action == ACTION_CONTINUE ||
152 !(ops->pmd_entry || ops->pte_entry))
153 continue;
154
155 if (walk->vma)
156 split_huge_pud(walk->vma, pud, addr);
157 if (pud_none(*pud))
158 goto again;
159
160 err = walk_pmd_range(pud, addr, next, walk);
161 if (err)
162 break;
163 } while (pud++, addr = next, addr != end);
164
165 return err;
166}
167
168static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
169 struct mm_walk *walk)
170{
171 p4d_t *p4d;
172 unsigned long next;
173 const struct mm_walk_ops *ops = walk->ops;
174 int err = 0;
175 int depth = real_depth(1);
176
177 p4d = p4d_offset(pgd, addr);
178 do {
179 next = p4d_addr_end(addr, end);
180 if (p4d_none_or_clear_bad(p4d)) {
181 if (ops->pte_hole)
182 err = ops->pte_hole(addr, next, depth, walk);
183 if (err)
184 break;
185 continue;
186 }
187 if (ops->p4d_entry) {
188 err = ops->p4d_entry(p4d, addr, next, walk);
189 if (err)
190 break;
191 }
192 if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
193 err = walk_pud_range(p4d, addr, next, walk);
194 if (err)
195 break;
196 } while (p4d++, addr = next, addr != end);
197
198 return err;
199}
200
201static int walk_pgd_range(unsigned long addr, unsigned long end,
202 struct mm_walk *walk)
203{
204 pgd_t *pgd;
205 unsigned long next;
206 const struct mm_walk_ops *ops = walk->ops;
207 int err = 0;
208
209 if (walk->pgd)
210 pgd = walk->pgd + pgd_index(addr);
211 else
212 pgd = pgd_offset(walk->mm, addr);
213 do {
214 next = pgd_addr_end(addr, end);
215 if (pgd_none_or_clear_bad(pgd)) {
216 if (ops->pte_hole)
217 err = ops->pte_hole(addr, next, 0, walk);
218 if (err)
219 break;
220 continue;
221 }
222 if (ops->pgd_entry) {
223 err = ops->pgd_entry(pgd, addr, next, walk);
224 if (err)
225 break;
226 }
227 if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry ||
228 ops->pte_entry)
229 err = walk_p4d_range(pgd, addr, next, walk);
230 if (err)
231 break;
232 } while (pgd++, addr = next, addr != end);
233
234 return err;
235}
236
237#ifdef CONFIG_HUGETLB_PAGE
238static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
239 unsigned long end)
240{
241 unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
242 return boundary < end ? boundary : end;
243}
244
245static int walk_hugetlb_range(unsigned long addr, unsigned long end,
246 struct mm_walk *walk)
247{
248 struct vm_area_struct *vma = walk->vma;
249 struct hstate *h = hstate_vma(vma);
250 unsigned long next;
251 unsigned long hmask = huge_page_mask(h);
252 unsigned long sz = huge_page_size(h);
253 pte_t *pte;
254 const struct mm_walk_ops *ops = walk->ops;
255 int err = 0;
256
257 do {
258 next = hugetlb_entry_end(h, addr, end);
259 pte = huge_pte_offset(walk->mm, addr & hmask, sz);
260
261 if (pte)
262 err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
263 else if (ops->pte_hole)
264 err = ops->pte_hole(addr, next, -1, walk);
265
266 if (err)
267 break;
268 } while (addr = next, addr != end);
269
270 return err;
271}
272
273#else
274static int walk_hugetlb_range(unsigned long addr, unsigned long end,
275 struct mm_walk *walk)
276{
277 return 0;
278}
279
280#endif
281
282
283
284
285
286
287
288static int walk_page_test(unsigned long start, unsigned long end,
289 struct mm_walk *walk)
290{
291 struct vm_area_struct *vma = walk->vma;
292 const struct mm_walk_ops *ops = walk->ops;
293
294 if (ops->test_walk)
295 return ops->test_walk(start, end, walk);
296
297
298
299
300
301
302
303
304
305 if (vma->vm_flags & VM_PFNMAP) {
306 int err = 1;
307 if (ops->pte_hole)
308 err = ops->pte_hole(start, end, -1, walk);
309 return err ? err : 1;
310 }
311 return 0;
312}
313
314static int __walk_page_range(unsigned long start, unsigned long end,
315 struct mm_walk *walk)
316{
317 int err = 0;
318 struct vm_area_struct *vma = walk->vma;
319 const struct mm_walk_ops *ops = walk->ops;
320
321 if (vma && ops->pre_vma) {
322 err = ops->pre_vma(start, end, walk);
323 if (err)
324 return err;
325 }
326
327 if (vma && is_vm_hugetlb_page(vma)) {
328 if (ops->hugetlb_entry)
329 err = walk_hugetlb_range(start, end, walk);
330 } else
331 err = walk_pgd_range(start, end, walk);
332
333 if (vma && ops->post_vma)
334 ops->post_vma(walk);
335
336 return err;
337}
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379int walk_page_range(struct mm_struct *mm, unsigned long start,
380 unsigned long end, const struct mm_walk_ops *ops,
381 void *private)
382{
383 int err = 0;
384 unsigned long next;
385 struct vm_area_struct *vma;
386 struct mm_walk walk = {
387 .ops = ops,
388 .mm = mm,
389 .private = private,
390 };
391
392 if (start >= end)
393 return -EINVAL;
394
395 if (!walk.mm)
396 return -EINVAL;
397
398 mmap_assert_locked(walk.mm);
399
400 vma = find_vma(walk.mm, start);
401 do {
402 if (!vma) {
403 walk.vma = NULL;
404 next = end;
405 } else if (start < vma->vm_start) {
406 walk.vma = NULL;
407 next = min(end, vma->vm_start);
408 } else {
409 walk.vma = vma;
410 next = min(end, vma->vm_end);
411 vma = vma->vm_next;
412
413 err = walk_page_test(start, next, &walk);
414 if (err > 0) {
415
416
417
418
419
420 err = 0;
421 continue;
422 }
423 if (err < 0)
424 break;
425 }
426 if (walk.vma || walk.ops->pte_hole)
427 err = __walk_page_range(start, next, &walk);
428 if (err)
429 break;
430 } while (start = next, start < end);
431 return err;
432}
433
434
435
436
437
438
439
440int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
441 unsigned long end, const struct mm_walk_ops *ops,
442 pgd_t *pgd,
443 void *private)
444{
445 struct mm_walk walk = {
446 .ops = ops,
447 .mm = mm,
448 .pgd = pgd,
449 .private = private,
450 .no_vma = true
451 };
452
453 if (start >= end || !walk.mm)
454 return -EINVAL;
455
456 mmap_assert_locked(walk.mm);
457
458 return __walk_page_range(start, end, &walk);
459}
460
461int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
462 void *private)
463{
464 struct mm_walk walk = {
465 .ops = ops,
466 .mm = vma->vm_mm,
467 .vma = vma,
468 .private = private,
469 };
470 int err;
471
472 if (!walk.mm)
473 return -EINVAL;
474
475 mmap_assert_locked(walk.mm);
476
477 err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
478 if (err > 0)
479 return 0;
480 if (err < 0)
481 return err;
482 return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
483}
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
516 pgoff_t nr, const struct mm_walk_ops *ops,
517 void *private)
518{
519 struct mm_walk walk = {
520 .ops = ops,
521 .private = private,
522 };
523 struct vm_area_struct *vma;
524 pgoff_t vba, vea, cba, cea;
525 unsigned long start_addr, end_addr;
526 int err = 0;
527
528 lockdep_assert_held(&mapping->i_mmap_rwsem);
529 vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index,
530 first_index + nr - 1) {
531
532 vba = vma->vm_pgoff;
533 vea = vba + vma_pages(vma);
534 cba = first_index;
535 cba = max(cba, vba);
536 cea = first_index + nr;
537 cea = min(cea, vea);
538
539 start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start;
540 end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start;
541 if (start_addr >= end_addr)
542 continue;
543
544 walk.vma = vma;
545 walk.mm = vma->vm_mm;
546
547 err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
548 if (err > 0) {
549 err = 0;
550 break;
551 } else if (err < 0)
552 break;
553
554 err = __walk_page_range(start_addr, end_addr, &walk);
555 if (err)
556 break;
557 }
558
559 return err;
560}
561