1
2#include <linux/pagewalk.h>
3#include <linux/vmacache.h>
4#include <linux/hugetlb.h>
5#include <linux/huge_mm.h>
6#include <linux/mount.h>
7#include <linux/seq_file.h>
8#include <linux/highmem.h>
9#include <linux/ptrace.h>
10#include <linux/slab.h>
11#include <linux/pagemap.h>
12#include <linux/mempolicy.h>
13#include <linux/rmap.h>
14#include <linux/swap.h>
15#include <linux/sched/mm.h>
16#include <linux/swapops.h>
17#include <linux/mmu_notifier.h>
18#include <linux/page_idle.h>
19#include <linux/shmem_fs.h>
20#include <linux/uaccess.h>
21#include <linux/pkeys.h>
22
23#include <asm/elf.h>
24#include <asm/tlb.h>
25#include <asm/tlbflush.h>
26#include "internal.h"
27
28#define SEQ_PUT_DEC(str, val) \
29 seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8)
30void task_mem(struct seq_file *m, struct mm_struct *mm)
31{
32 unsigned long text, lib, swap, anon, file, shmem;
33 unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
34
35 anon = get_mm_counter(mm, MM_ANONPAGES);
36 file = get_mm_counter(mm, MM_FILEPAGES);
37 shmem = get_mm_counter(mm, MM_SHMEMPAGES);
38
39
40
41
42
43
44
45
46 hiwater_vm = total_vm = mm->total_vm;
47 if (hiwater_vm < mm->hiwater_vm)
48 hiwater_vm = mm->hiwater_vm;
49 hiwater_rss = total_rss = anon + file + shmem;
50 if (hiwater_rss < mm->hiwater_rss)
51 hiwater_rss = mm->hiwater_rss;
52
53
54 text = PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK);
55 text = min(text, mm->exec_vm << PAGE_SHIFT);
56 lib = (mm->exec_vm << PAGE_SHIFT) - text;
57
58 swap = get_mm_counter(mm, MM_SWAPENTS);
59 SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
60 SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
61 SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
62 SEQ_PUT_DEC(" kB\nVmPin:\t", atomic64_read(&mm->pinned_vm));
63 SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
64 SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
65 SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
66 SEQ_PUT_DEC(" kB\nRssFile:\t", file);
67 SEQ_PUT_DEC(" kB\nRssShmem:\t", shmem);
68 SEQ_PUT_DEC(" kB\nVmData:\t", mm->data_vm);
69 SEQ_PUT_DEC(" kB\nVmStk:\t", mm->stack_vm);
70 seq_put_decimal_ull_width(m,
71 " kB\nVmExe:\t", text >> 10, 8);
72 seq_put_decimal_ull_width(m,
73 " kB\nVmLib:\t", lib >> 10, 8);
74 seq_put_decimal_ull_width(m,
75 " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8);
76 SEQ_PUT_DEC(" kB\nVmSwap:\t", swap);
77 seq_puts(m, " kB\n");
78 hugetlb_report_usage(m, mm);
79}
80#undef SEQ_PUT_DEC
81
82unsigned long task_vsize(struct mm_struct *mm)
83{
84 return PAGE_SIZE * mm->total_vm;
85}
86
87unsigned long task_statm(struct mm_struct *mm,
88 unsigned long *shared, unsigned long *text,
89 unsigned long *data, unsigned long *resident)
90{
91 *shared = get_mm_counter(mm, MM_FILEPAGES) +
92 get_mm_counter(mm, MM_SHMEMPAGES);
93 *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
94 >> PAGE_SHIFT;
95 *data = mm->data_vm + mm->stack_vm;
96 *resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
97 return mm->total_vm;
98}
99
100#ifdef CONFIG_NUMA
101
102
103
104static void hold_task_mempolicy(struct proc_maps_private *priv)
105{
106 struct task_struct *task = priv->task;
107
108 task_lock(task);
109 priv->task_mempolicy = get_task_policy(task);
110 mpol_get(priv->task_mempolicy);
111 task_unlock(task);
112}
113static void release_task_mempolicy(struct proc_maps_private *priv)
114{
115 mpol_put(priv->task_mempolicy);
116}
117#else
118static void hold_task_mempolicy(struct proc_maps_private *priv)
119{
120}
121static void release_task_mempolicy(struct proc_maps_private *priv)
122{
123}
124#endif
125
126static void *m_start(struct seq_file *m, loff_t *ppos)
127{
128 struct proc_maps_private *priv = m->private;
129 unsigned long last_addr = *ppos;
130 struct mm_struct *mm;
131 struct vm_area_struct *vma;
132
133
134 if (last_addr == -1UL)
135 return NULL;
136
137 priv->task = get_proc_task(priv->inode);
138 if (!priv->task)
139 return ERR_PTR(-ESRCH);
140
141 mm = priv->mm;
142 if (!mm || !mmget_not_zero(mm)) {
143 put_task_struct(priv->task);
144 priv->task = NULL;
145 return NULL;
146 }
147
148 if (mmap_read_lock_killable(mm)) {
149 mmput(mm);
150 put_task_struct(priv->task);
151 priv->task = NULL;
152 return ERR_PTR(-EINTR);
153 }
154
155 hold_task_mempolicy(priv);
156 priv->tail_vma = get_gate_vma(mm);
157
158 vma = find_vma(mm, last_addr);
159 if (vma)
160 return vma;
161
162 return priv->tail_vma;
163}
164
165static void *m_next(struct seq_file *m, void *v, loff_t *ppos)
166{
167 struct proc_maps_private *priv = m->private;
168 struct vm_area_struct *next, *vma = v;
169
170 if (vma == priv->tail_vma)
171 next = NULL;
172 else if (vma->vm_next)
173 next = vma->vm_next;
174 else
175 next = priv->tail_vma;
176
177 *ppos = next ? next->vm_start : -1UL;
178
179 return next;
180}
181
182static void m_stop(struct seq_file *m, void *v)
183{
184 struct proc_maps_private *priv = m->private;
185 struct mm_struct *mm = priv->mm;
186
187 if (!priv->task)
188 return;
189
190 release_task_mempolicy(priv);
191 mmap_read_unlock(mm);
192 mmput(mm);
193 put_task_struct(priv->task);
194 priv->task = NULL;
195}
196
197static int proc_maps_open(struct inode *inode, struct file *file,
198 const struct seq_operations *ops, int psize)
199{
200 struct proc_maps_private *priv = __seq_open_private(file, ops, psize);
201
202 if (!priv)
203 return -ENOMEM;
204
205 priv->inode = inode;
206 priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
207 if (IS_ERR(priv->mm)) {
208 int err = PTR_ERR(priv->mm);
209
210 seq_release_private(inode, file);
211 return err;
212 }
213
214 return 0;
215}
216
217static int proc_map_release(struct inode *inode, struct file *file)
218{
219 struct seq_file *seq = file->private_data;
220 struct proc_maps_private *priv = seq->private;
221
222 if (priv->mm)
223 mmdrop(priv->mm);
224
225 return seq_release_private(inode, file);
226}
227
228static int do_maps_open(struct inode *inode, struct file *file,
229 const struct seq_operations *ops)
230{
231 return proc_maps_open(inode, file, ops,
232 sizeof(struct proc_maps_private));
233}
234
235
236
237
238
239static int is_stack(struct vm_area_struct *vma)
240{
241
242
243
244
245
246 return vma->vm_start <= vma->vm_mm->start_stack &&
247 vma->vm_end >= vma->vm_mm->start_stack;
248}
249
250static void show_vma_header_prefix(struct seq_file *m,
251 unsigned long start, unsigned long end,
252 vm_flags_t flags, unsigned long long pgoff,
253 dev_t dev, unsigned long ino)
254{
255 seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
256 seq_put_hex_ll(m, NULL, start, 8);
257 seq_put_hex_ll(m, "-", end, 8);
258 seq_putc(m, ' ');
259 seq_putc(m, flags & VM_READ ? 'r' : '-');
260 seq_putc(m, flags & VM_WRITE ? 'w' : '-');
261 seq_putc(m, flags & VM_EXEC ? 'x' : '-');
262 seq_putc(m, flags & VM_MAYSHARE ? 's' : 'p');
263 seq_put_hex_ll(m, " ", pgoff, 8);
264 seq_put_hex_ll(m, " ", MAJOR(dev), 2);
265 seq_put_hex_ll(m, ":", MINOR(dev), 2);
266 seq_put_decimal_ull(m, " ", ino);
267 seq_putc(m, ' ');
268}
269
270static void
271show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
272{
273 struct mm_struct *mm = vma->vm_mm;
274 struct file *file = vma->vm_file;
275 vm_flags_t flags = vma->vm_flags;
276 unsigned long ino = 0;
277 unsigned long long pgoff = 0;
278 unsigned long start, end;
279 dev_t dev = 0;
280 const char *name = NULL;
281
282 if (file) {
283 struct inode *inode = file_inode(vma->vm_file);
284 dev = inode->i_sb->s_dev;
285 ino = inode->i_ino;
286 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
287 }
288
289 start = vma->vm_start;
290 end = vma->vm_end;
291 show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
292
293
294
295
296
297 if (file) {
298 seq_pad(m, ' ');
299 seq_file_path(m, file, "\n");
300 goto done;
301 }
302
303 if (vma->vm_ops && vma->vm_ops->name) {
304 name = vma->vm_ops->name(vma);
305 if (name)
306 goto done;
307 }
308
309 name = arch_vma_name(vma);
310 if (!name) {
311 if (!mm) {
312 name = "[vdso]";
313 goto done;
314 }
315
316 if (vma->vm_start <= mm->brk &&
317 vma->vm_end >= mm->start_brk) {
318 name = "[heap]";
319 goto done;
320 }
321
322 if (is_stack(vma))
323 name = "[stack]";
324 }
325
326done:
327 if (name) {
328 seq_pad(m, ' ');
329 seq_puts(m, name);
330 }
331 seq_putc(m, '\n');
332}
333
334static int show_map(struct seq_file *m, void *v)
335{
336 show_map_vma(m, v);
337 return 0;
338}
339
340static const struct seq_operations proc_pid_maps_op = {
341 .start = m_start,
342 .next = m_next,
343 .stop = m_stop,
344 .show = show_map
345};
346
347static int pid_maps_open(struct inode *inode, struct file *file)
348{
349 return do_maps_open(inode, file, &proc_pid_maps_op);
350}
351
352const struct file_operations proc_pid_maps_operations = {
353 .open = pid_maps_open,
354 .read = seq_read,
355 .llseek = seq_lseek,
356 .release = proc_map_release,
357};
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376#define PSS_SHIFT 12
377
378#ifdef CONFIG_PROC_PAGE_MONITOR
379struct mem_size_stats {
380 unsigned long resident;
381 unsigned long shared_clean;
382 unsigned long shared_dirty;
383 unsigned long private_clean;
384 unsigned long private_dirty;
385 unsigned long referenced;
386 unsigned long anonymous;
387 unsigned long lazyfree;
388 unsigned long anonymous_thp;
389 unsigned long shmem_thp;
390 unsigned long file_thp;
391 unsigned long swap;
392 unsigned long shared_hugetlb;
393 unsigned long private_hugetlb;
394 u64 pss;
395 u64 pss_anon;
396 u64 pss_file;
397 u64 pss_shmem;
398 u64 pss_locked;
399 u64 swap_pss;
400 bool check_shmem_swap;
401};
402
403static void smaps_page_accumulate(struct mem_size_stats *mss,
404 struct page *page, unsigned long size, unsigned long pss,
405 bool dirty, bool locked, bool private)
406{
407 mss->pss += pss;
408
409 if (PageAnon(page))
410 mss->pss_anon += pss;
411 else if (PageSwapBacked(page))
412 mss->pss_shmem += pss;
413 else
414 mss->pss_file += pss;
415
416 if (locked)
417 mss->pss_locked += pss;
418
419 if (dirty || PageDirty(page)) {
420 if (private)
421 mss->private_dirty += size;
422 else
423 mss->shared_dirty += size;
424 } else {
425 if (private)
426 mss->private_clean += size;
427 else
428 mss->shared_clean += size;
429 }
430}
431
432static void smaps_account(struct mem_size_stats *mss, struct page *page,
433 bool compound, bool young, bool dirty, bool locked)
434{
435 int i, nr = compound ? compound_nr(page) : 1;
436 unsigned long size = nr * PAGE_SIZE;
437
438
439
440
441
442 if (PageAnon(page)) {
443 mss->anonymous += size;
444 if (!PageSwapBacked(page) && !dirty && !PageDirty(page))
445 mss->lazyfree += size;
446 }
447
448 mss->resident += size;
449
450 if (young || page_is_young(page) || PageReferenced(page))
451 mss->referenced += size;
452
453
454
455
456
457
458
459
460
461 if (page_count(page) == 1) {
462 smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty,
463 locked, true);
464 return;
465 }
466 for (i = 0; i < nr; i++, page++) {
467 int mapcount = page_mapcount(page);
468 unsigned long pss = PAGE_SIZE << PSS_SHIFT;
469 if (mapcount >= 2)
470 pss /= mapcount;
471 smaps_page_accumulate(mss, page, PAGE_SIZE, pss, dirty, locked,
472 mapcount < 2);
473 }
474}
475
476#ifdef CONFIG_SHMEM
477static int smaps_pte_hole(unsigned long addr, unsigned long end,
478 __always_unused int depth, struct mm_walk *walk)
479{
480 struct mem_size_stats *mss = walk->private;
481
482 mss->swap += shmem_partial_swap_usage(
483 walk->vma->vm_file->f_mapping, addr, end);
484
485 return 0;
486}
487#else
488#define smaps_pte_hole NULL
489#endif
490
491static void smaps_pte_entry(pte_t *pte, unsigned long addr,
492 struct mm_walk *walk)
493{
494 struct mem_size_stats *mss = walk->private;
495 struct vm_area_struct *vma = walk->vma;
496 bool locked = !!(vma->vm_flags & VM_LOCKED);
497 struct page *page = NULL;
498
499 if (pte_present(*pte)) {
500 page = vm_normal_page(vma, addr, *pte);
501 } else if (is_swap_pte(*pte)) {
502 swp_entry_t swpent = pte_to_swp_entry(*pte);
503
504 if (!non_swap_entry(swpent)) {
505 int mapcount;
506
507 mss->swap += PAGE_SIZE;
508 mapcount = swp_swapcount(swpent);
509 if (mapcount >= 2) {
510 u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT;
511
512 do_div(pss_delta, mapcount);
513 mss->swap_pss += pss_delta;
514 } else {
515 mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
516 }
517 } else if (is_pfn_swap_entry(swpent))
518 page = pfn_swap_entry_to_page(swpent);
519 } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
520 && pte_none(*pte))) {
521 page = xa_load(&vma->vm_file->f_mapping->i_pages,
522 linear_page_index(vma, addr));
523 if (xa_is_value(page))
524 mss->swap += PAGE_SIZE;
525 return;
526 }
527
528 if (!page)
529 return;
530
531 smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked);
532}
533
534#ifdef CONFIG_TRANSPARENT_HUGEPAGE
535static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
536 struct mm_walk *walk)
537{
538 struct mem_size_stats *mss = walk->private;
539 struct vm_area_struct *vma = walk->vma;
540 bool locked = !!(vma->vm_flags & VM_LOCKED);
541 struct page *page = NULL;
542
543 if (pmd_present(*pmd)) {
544
545 page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
546 } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
547 swp_entry_t entry = pmd_to_swp_entry(*pmd);
548
549 if (is_migration_entry(entry))
550 page = pfn_swap_entry_to_page(entry);
551 }
552 if (IS_ERR_OR_NULL(page))
553 return;
554 if (PageAnon(page))
555 mss->anonymous_thp += HPAGE_PMD_SIZE;
556 else if (PageSwapBacked(page))
557 mss->shmem_thp += HPAGE_PMD_SIZE;
558 else if (is_zone_device_page(page))
559 ;
560 else
561 mss->file_thp += HPAGE_PMD_SIZE;
562 smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
563}
564#else
565static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
566 struct mm_walk *walk)
567{
568}
569#endif
570
571static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
572 struct mm_walk *walk)
573{
574 struct vm_area_struct *vma = walk->vma;
575 pte_t *pte;
576 spinlock_t *ptl;
577
578 ptl = pmd_trans_huge_lock(pmd, vma);
579 if (ptl) {
580 smaps_pmd_entry(pmd, addr, walk);
581 spin_unlock(ptl);
582 goto out;
583 }
584
585 if (pmd_trans_unstable(pmd))
586 goto out;
587
588
589
590
591
592 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
593 for (; addr != end; pte++, addr += PAGE_SIZE)
594 smaps_pte_entry(pte, addr, walk);
595 pte_unmap_unlock(pte - 1, ptl);
596out:
597 cond_resched();
598 return 0;
599}
600
601static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
602{
603
604
605
606 static const char mnemonics[BITS_PER_LONG][2] = {
607
608
609
610 [0 ... (BITS_PER_LONG-1)] = "??",
611
612 [ilog2(VM_READ)] = "rd",
613 [ilog2(VM_WRITE)] = "wr",
614 [ilog2(VM_EXEC)] = "ex",
615 [ilog2(VM_SHARED)] = "sh",
616 [ilog2(VM_MAYREAD)] = "mr",
617 [ilog2(VM_MAYWRITE)] = "mw",
618 [ilog2(VM_MAYEXEC)] = "me",
619 [ilog2(VM_MAYSHARE)] = "ms",
620 [ilog2(VM_GROWSDOWN)] = "gd",
621 [ilog2(VM_PFNMAP)] = "pf",
622 [ilog2(VM_LOCKED)] = "lo",
623 [ilog2(VM_IO)] = "io",
624 [ilog2(VM_SEQ_READ)] = "sr",
625 [ilog2(VM_RAND_READ)] = "rr",
626 [ilog2(VM_DONTCOPY)] = "dc",
627 [ilog2(VM_DONTEXPAND)] = "de",
628 [ilog2(VM_ACCOUNT)] = "ac",
629 [ilog2(VM_NORESERVE)] = "nr",
630 [ilog2(VM_HUGETLB)] = "ht",
631 [ilog2(VM_SYNC)] = "sf",
632 [ilog2(VM_ARCH_1)] = "ar",
633 [ilog2(VM_WIPEONFORK)] = "wf",
634 [ilog2(VM_DONTDUMP)] = "dd",
635#ifdef CONFIG_ARM64_BTI
636 [ilog2(VM_ARM64_BTI)] = "bt",
637#endif
638#ifdef CONFIG_MEM_SOFT_DIRTY
639 [ilog2(VM_SOFTDIRTY)] = "sd",
640#endif
641 [ilog2(VM_MIXEDMAP)] = "mm",
642 [ilog2(VM_HUGEPAGE)] = "hg",
643 [ilog2(VM_NOHUGEPAGE)] = "nh",
644 [ilog2(VM_MERGEABLE)] = "mg",
645 [ilog2(VM_UFFD_MISSING)]= "um",
646 [ilog2(VM_UFFD_WP)] = "uw",
647#ifdef CONFIG_ARM64_MTE
648 [ilog2(VM_MTE)] = "mt",
649 [ilog2(VM_MTE_ALLOWED)] = "",
650#endif
651#ifdef CONFIG_ARCH_HAS_PKEYS
652
653 [ilog2(VM_PKEY_BIT0)] = "",
654 [ilog2(VM_PKEY_BIT1)] = "",
655 [ilog2(VM_PKEY_BIT2)] = "",
656 [ilog2(VM_PKEY_BIT3)] = "",
657#if VM_PKEY_BIT4
658 [ilog2(VM_PKEY_BIT4)] = "",
659#endif
660#endif
661#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
662 [ilog2(VM_UFFD_MINOR)] = "ui",
663#endif
664 };
665 size_t i;
666
667 seq_puts(m, "VmFlags: ");
668 for (i = 0; i < BITS_PER_LONG; i++) {
669 if (!mnemonics[i][0])
670 continue;
671 if (vma->vm_flags & (1UL << i)) {
672 seq_putc(m, mnemonics[i][0]);
673 seq_putc(m, mnemonics[i][1]);
674 seq_putc(m, ' ');
675 }
676 }
677 seq_putc(m, '\n');
678}
679
680#ifdef CONFIG_HUGETLB_PAGE
681static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
682 unsigned long addr, unsigned long end,
683 struct mm_walk *walk)
684{
685 struct mem_size_stats *mss = walk->private;
686 struct vm_area_struct *vma = walk->vma;
687 struct page *page = NULL;
688
689 if (pte_present(*pte)) {
690 page = vm_normal_page(vma, addr, *pte);
691 } else if (is_swap_pte(*pte)) {
692 swp_entry_t swpent = pte_to_swp_entry(*pte);
693
694 if (is_pfn_swap_entry(swpent))
695 page = pfn_swap_entry_to_page(swpent);
696 }
697 if (page) {
698 int mapcount = page_mapcount(page);
699
700 if (mapcount >= 2)
701 mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
702 else
703 mss->private_hugetlb += huge_page_size(hstate_vma(vma));
704 }
705 return 0;
706}
707#else
708#define smaps_hugetlb_range NULL
709#endif
710
711static const struct mm_walk_ops smaps_walk_ops = {
712 .pmd_entry = smaps_pte_range,
713 .hugetlb_entry = smaps_hugetlb_range,
714};
715
716static const struct mm_walk_ops smaps_shmem_walk_ops = {
717 .pmd_entry = smaps_pte_range,
718 .hugetlb_entry = smaps_hugetlb_range,
719 .pte_hole = smaps_pte_hole,
720};
721
722
723
724
725
726
727
728static void smap_gather_stats(struct vm_area_struct *vma,
729 struct mem_size_stats *mss, unsigned long start)
730{
731 const struct mm_walk_ops *ops = &smaps_walk_ops;
732
733
734 if (start >= vma->vm_end)
735 return;
736
737#ifdef CONFIG_SHMEM
738
739 mss->check_shmem_swap = false;
740 if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
741
742
743
744
745
746
747
748
749
750
751 unsigned long shmem_swapped = shmem_swap_usage(vma);
752
753 if (!start && (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
754 !(vma->vm_flags & VM_WRITE))) {
755 mss->swap += shmem_swapped;
756 } else {
757 mss->check_shmem_swap = true;
758 ops = &smaps_shmem_walk_ops;
759 }
760 }
761#endif
762
763 if (!start)
764 walk_page_vma(vma, ops, mss);
765 else
766 walk_page_range(vma->vm_mm, start, vma->vm_end, ops, mss);
767}
768
769#define SEQ_PUT_DEC(str, val) \
770 seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
771
772
773static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
774 bool rollup_mode)
775{
776 SEQ_PUT_DEC("Rss: ", mss->resident);
777 SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT);
778 if (rollup_mode) {
779
780
781
782
783 SEQ_PUT_DEC(" kB\nPss_Anon: ",
784 mss->pss_anon >> PSS_SHIFT);
785 SEQ_PUT_DEC(" kB\nPss_File: ",
786 mss->pss_file >> PSS_SHIFT);
787 SEQ_PUT_DEC(" kB\nPss_Shmem: ",
788 mss->pss_shmem >> PSS_SHIFT);
789 }
790 SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean);
791 SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty);
792 SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean);
793 SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss->private_dirty);
794 SEQ_PUT_DEC(" kB\nReferenced: ", mss->referenced);
795 SEQ_PUT_DEC(" kB\nAnonymous: ", mss->anonymous);
796 SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
797 SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
798 SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
799 SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp);
800 SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
801 seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
802 mss->private_hugetlb >> 10, 7);
803 SEQ_PUT_DEC(" kB\nSwap: ", mss->swap);
804 SEQ_PUT_DEC(" kB\nSwapPss: ",
805 mss->swap_pss >> PSS_SHIFT);
806 SEQ_PUT_DEC(" kB\nLocked: ",
807 mss->pss_locked >> PSS_SHIFT);
808 seq_puts(m, " kB\n");
809}
810
811static int show_smap(struct seq_file *m, void *v)
812{
813 struct vm_area_struct *vma = v;
814 struct mem_size_stats mss;
815
816 memset(&mss, 0, sizeof(mss));
817
818 smap_gather_stats(vma, &mss, 0);
819
820 show_map_vma(m, vma);
821
822 SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start);
823 SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
824 SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma));
825 seq_puts(m, " kB\n");
826
827 __show_smap(m, &mss, false);
828
829 seq_printf(m, "THPeligible: %d\n",
830 transparent_hugepage_active(vma));
831
832 if (arch_pkeys_enabled())
833 seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
834 show_smap_vma_flags(m, vma);
835
836 return 0;
837}
838
839static int show_smaps_rollup(struct seq_file *m, void *v)
840{
841 struct proc_maps_private *priv = m->private;
842 struct mem_size_stats mss;
843 struct mm_struct *mm;
844 struct vm_area_struct *vma;
845 unsigned long last_vma_end = 0;
846 int ret = 0;
847
848 priv->task = get_proc_task(priv->inode);
849 if (!priv->task)
850 return -ESRCH;
851
852 mm = priv->mm;
853 if (!mm || !mmget_not_zero(mm)) {
854 ret = -ESRCH;
855 goto out_put_task;
856 }
857
858 memset(&mss, 0, sizeof(mss));
859
860 ret = mmap_read_lock_killable(mm);
861 if (ret)
862 goto out_put_mm;
863
864 hold_task_mempolicy(priv);
865
866 for (vma = priv->mm->mmap; vma;) {
867 smap_gather_stats(vma, &mss, 0);
868 last_vma_end = vma->vm_end;
869
870
871
872
873
874 if (mmap_lock_is_contended(mm)) {
875 mmap_read_unlock(mm);
876 ret = mmap_read_lock_killable(mm);
877 if (ret) {
878 release_task_mempolicy(priv);
879 goto out_put_mm;
880 }
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918 vma = find_vma(mm, last_vma_end - 1);
919
920 if (!vma)
921 break;
922
923
924 if (vma->vm_start >= last_vma_end)
925 continue;
926
927
928 if (vma->vm_end > last_vma_end)
929 smap_gather_stats(vma, &mss, last_vma_end);
930 }
931
932 vma = vma->vm_next;
933 }
934
935 show_vma_header_prefix(m, priv->mm->mmap->vm_start,
936 last_vma_end, 0, 0, 0, 0);
937 seq_pad(m, ' ');
938 seq_puts(m, "[rollup]\n");
939
940 __show_smap(m, &mss, true);
941
942 release_task_mempolicy(priv);
943 mmap_read_unlock(mm);
944
945out_put_mm:
946 mmput(mm);
947out_put_task:
948 put_task_struct(priv->task);
949 priv->task = NULL;
950
951 return ret;
952}
953#undef SEQ_PUT_DEC
954
955static const struct seq_operations proc_pid_smaps_op = {
956 .start = m_start,
957 .next = m_next,
958 .stop = m_stop,
959 .show = show_smap
960};
961
962static int pid_smaps_open(struct inode *inode, struct file *file)
963{
964 return do_maps_open(inode, file, &proc_pid_smaps_op);
965}
966
967static int smaps_rollup_open(struct inode *inode, struct file *file)
968{
969 int ret;
970 struct proc_maps_private *priv;
971
972 priv = kzalloc(sizeof(*priv), GFP_KERNEL_ACCOUNT);
973 if (!priv)
974 return -ENOMEM;
975
976 ret = single_open(file, show_smaps_rollup, priv);
977 if (ret)
978 goto out_free;
979
980 priv->inode = inode;
981 priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
982 if (IS_ERR(priv->mm)) {
983 ret = PTR_ERR(priv->mm);
984
985 single_release(inode, file);
986 goto out_free;
987 }
988
989 return 0;
990
991out_free:
992 kfree(priv);
993 return ret;
994}
995
996static int smaps_rollup_release(struct inode *inode, struct file *file)
997{
998 struct seq_file *seq = file->private_data;
999 struct proc_maps_private *priv = seq->private;
1000
1001 if (priv->mm)
1002 mmdrop(priv->mm);
1003
1004 kfree(priv);
1005 return single_release(inode, file);
1006}
1007
1008const struct file_operations proc_pid_smaps_operations = {
1009 .open = pid_smaps_open,
1010 .read = seq_read,
1011 .llseek = seq_lseek,
1012 .release = proc_map_release,
1013};
1014
1015const struct file_operations proc_pid_smaps_rollup_operations = {
1016 .open = smaps_rollup_open,
1017 .read = seq_read,
1018 .llseek = seq_lseek,
1019 .release = smaps_rollup_release,
1020};
1021
1022enum clear_refs_types {
1023 CLEAR_REFS_ALL = 1,
1024 CLEAR_REFS_ANON,
1025 CLEAR_REFS_MAPPED,
1026 CLEAR_REFS_SOFT_DIRTY,
1027 CLEAR_REFS_MM_HIWATER_RSS,
1028 CLEAR_REFS_LAST,
1029};
1030
1031struct clear_refs_private {
1032 enum clear_refs_types type;
1033};
1034
1035#ifdef CONFIG_MEM_SOFT_DIRTY
1036
1037static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
1038{
1039 struct page *page;
1040
1041 if (!pte_write(pte))
1042 return false;
1043 if (!is_cow_mapping(vma->vm_flags))
1044 return false;
1045 if (likely(!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)))
1046 return false;
1047 page = vm_normal_page(vma, addr, pte);
1048 if (!page)
1049 return false;
1050 return page_maybe_dma_pinned(page);
1051}
1052
1053static inline void clear_soft_dirty(struct vm_area_struct *vma,
1054 unsigned long addr, pte_t *pte)
1055{
1056
1057
1058
1059
1060
1061
1062 pte_t ptent = *pte;
1063
1064 if (pte_present(ptent)) {
1065 pte_t old_pte;
1066
1067 if (pte_is_pinned(vma, addr, ptent))
1068 return;
1069 old_pte = ptep_modify_prot_start(vma, addr, pte);
1070 ptent = pte_wrprotect(old_pte);
1071 ptent = pte_clear_soft_dirty(ptent);
1072 ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
1073 } else if (is_swap_pte(ptent)) {
1074 ptent = pte_swp_clear_soft_dirty(ptent);
1075 set_pte_at(vma->vm_mm, addr, pte, ptent);
1076 }
1077}
1078#else
1079static inline void clear_soft_dirty(struct vm_area_struct *vma,
1080 unsigned long addr, pte_t *pte)
1081{
1082}
1083#endif
1084
1085#if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
1086static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
1087 unsigned long addr, pmd_t *pmdp)
1088{
1089 pmd_t old, pmd = *pmdp;
1090
1091 if (pmd_present(pmd)) {
1092
1093 old = pmdp_invalidate(vma, addr, pmdp);
1094 if (pmd_dirty(old))
1095 pmd = pmd_mkdirty(pmd);
1096 if (pmd_young(old))
1097 pmd = pmd_mkyoung(pmd);
1098
1099 pmd = pmd_wrprotect(pmd);
1100 pmd = pmd_clear_soft_dirty(pmd);
1101
1102 set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
1103 } else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
1104 pmd = pmd_swp_clear_soft_dirty(pmd);
1105 set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
1106 }
1107}
1108#else
1109static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
1110 unsigned long addr, pmd_t *pmdp)
1111{
1112}
1113#endif
1114
1115static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
1116 unsigned long end, struct mm_walk *walk)
1117{
1118 struct clear_refs_private *cp = walk->private;
1119 struct vm_area_struct *vma = walk->vma;
1120 pte_t *pte, ptent;
1121 spinlock_t *ptl;
1122 struct page *page;
1123
1124 ptl = pmd_trans_huge_lock(pmd, vma);
1125 if (ptl) {
1126 if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
1127 clear_soft_dirty_pmd(vma, addr, pmd);
1128 goto out;
1129 }
1130
1131 if (!pmd_present(*pmd))
1132 goto out;
1133
1134 page = pmd_page(*pmd);
1135
1136
1137 pmdp_test_and_clear_young(vma, addr, pmd);
1138 test_and_clear_page_young(page);
1139 ClearPageReferenced(page);
1140out:
1141 spin_unlock(ptl);
1142 return 0;
1143 }
1144
1145 if (pmd_trans_unstable(pmd))
1146 return 0;
1147
1148 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
1149 for (; addr != end; pte++, addr += PAGE_SIZE) {
1150 ptent = *pte;
1151
1152 if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
1153 clear_soft_dirty(vma, addr, pte);
1154 continue;
1155 }
1156
1157 if (!pte_present(ptent))
1158 continue;
1159
1160 page = vm_normal_page(vma, addr, ptent);
1161 if (!page)
1162 continue;
1163
1164
1165 ptep_test_and_clear_young(vma, addr, pte);
1166 test_and_clear_page_young(page);
1167 ClearPageReferenced(page);
1168 }
1169 pte_unmap_unlock(pte - 1, ptl);
1170 cond_resched();
1171 return 0;
1172}
1173
1174static int clear_refs_test_walk(unsigned long start, unsigned long end,
1175 struct mm_walk *walk)
1176{
1177 struct clear_refs_private *cp = walk->private;
1178 struct vm_area_struct *vma = walk->vma;
1179
1180 if (vma->vm_flags & VM_PFNMAP)
1181 return 1;
1182
1183
1184
1185
1186
1187
1188
1189 if (cp->type == CLEAR_REFS_ANON && vma->vm_file)
1190 return 1;
1191 if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file)
1192 return 1;
1193 return 0;
1194}
1195
1196static const struct mm_walk_ops clear_refs_walk_ops = {
1197 .pmd_entry = clear_refs_pte_range,
1198 .test_walk = clear_refs_test_walk,
1199};
1200
1201static ssize_t clear_refs_write(struct file *file, const char __user *buf,
1202 size_t count, loff_t *ppos)
1203{
1204 struct task_struct *task;
1205 char buffer[PROC_NUMBUF];
1206 struct mm_struct *mm;
1207 struct vm_area_struct *vma;
1208 enum clear_refs_types type;
1209 int itype;
1210 int rv;
1211
1212 memset(buffer, 0, sizeof(buffer));
1213 if (count > sizeof(buffer) - 1)
1214 count = sizeof(buffer) - 1;
1215 if (copy_from_user(buffer, buf, count))
1216 return -EFAULT;
1217 rv = kstrtoint(strstrip(buffer), 10, &itype);
1218 if (rv < 0)
1219 return rv;
1220 type = (enum clear_refs_types)itype;
1221 if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
1222 return -EINVAL;
1223
1224 task = get_proc_task(file_inode(file));
1225 if (!task)
1226 return -ESRCH;
1227 mm = get_task_mm(task);
1228 if (mm) {
1229 struct mmu_notifier_range range;
1230 struct clear_refs_private cp = {
1231 .type = type,
1232 };
1233
1234 if (mmap_write_lock_killable(mm)) {
1235 count = -EINTR;
1236 goto out_mm;
1237 }
1238 if (type == CLEAR_REFS_MM_HIWATER_RSS) {
1239
1240
1241
1242
1243 reset_mm_hiwater_rss(mm);
1244 goto out_unlock;
1245 }
1246
1247 if (type == CLEAR_REFS_SOFT_DIRTY) {
1248 for (vma = mm->mmap; vma; vma = vma->vm_next) {
1249 if (!(vma->vm_flags & VM_SOFTDIRTY))
1250 continue;
1251 vma->vm_flags &= ~VM_SOFTDIRTY;
1252 vma_set_page_prot(vma);
1253 }
1254
1255 inc_tlb_flush_pending(mm);
1256 mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
1257 0, NULL, mm, 0, -1UL);
1258 mmu_notifier_invalidate_range_start(&range);
1259 }
1260 walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops,
1261 &cp);
1262 if (type == CLEAR_REFS_SOFT_DIRTY) {
1263 mmu_notifier_invalidate_range_end(&range);
1264 flush_tlb_mm(mm);
1265 dec_tlb_flush_pending(mm);
1266 }
1267out_unlock:
1268 mmap_write_unlock(mm);
1269out_mm:
1270 mmput(mm);
1271 }
1272 put_task_struct(task);
1273
1274 return count;
1275}
1276
1277const struct file_operations proc_clear_refs_operations = {
1278 .write = clear_refs_write,
1279 .llseek = noop_llseek,
1280};
1281
1282typedef struct {
1283 u64 pme;
1284} pagemap_entry_t;
1285
1286struct pagemapread {
1287 int pos, len;
1288 pagemap_entry_t *buffer;
1289 bool show_pfn;
1290};
1291
1292#define PAGEMAP_WALK_SIZE (PMD_SIZE)
1293#define PAGEMAP_WALK_MASK (PMD_MASK)
1294
1295#define PM_ENTRY_BYTES sizeof(pagemap_entry_t)
1296#define PM_PFRAME_BITS 55
1297#define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
1298#define PM_SOFT_DIRTY BIT_ULL(55)
1299#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
1300#define PM_UFFD_WP BIT_ULL(57)
1301#define PM_FILE BIT_ULL(61)
1302#define PM_SWAP BIT_ULL(62)
1303#define PM_PRESENT BIT_ULL(63)
1304
1305#define PM_END_OF_BUFFER 1
1306
1307static inline pagemap_entry_t make_pme(u64 frame, u64 flags)
1308{
1309 return (pagemap_entry_t) { .pme = (frame & PM_PFRAME_MASK) | flags };
1310}
1311
1312static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
1313 struct pagemapread *pm)
1314{
1315 pm->buffer[pm->pos++] = *pme;
1316 if (pm->pos >= pm->len)
1317 return PM_END_OF_BUFFER;
1318 return 0;
1319}
1320
1321static int pagemap_pte_hole(unsigned long start, unsigned long end,
1322 __always_unused int depth, struct mm_walk *walk)
1323{
1324 struct pagemapread *pm = walk->private;
1325 unsigned long addr = start;
1326 int err = 0;
1327
1328 while (addr < end) {
1329 struct vm_area_struct *vma = find_vma(walk->mm, addr);
1330 pagemap_entry_t pme = make_pme(0, 0);
1331
1332 unsigned long hole_end;
1333
1334 if (vma)
1335 hole_end = min(end, vma->vm_start);
1336 else
1337 hole_end = end;
1338
1339 for (; addr < hole_end; addr += PAGE_SIZE) {
1340 err = add_to_pagemap(addr, &pme, pm);
1341 if (err)
1342 goto out;
1343 }
1344
1345 if (!vma)
1346 break;
1347
1348
1349 if (vma->vm_flags & VM_SOFTDIRTY)
1350 pme = make_pme(0, PM_SOFT_DIRTY);
1351 for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
1352 err = add_to_pagemap(addr, &pme, pm);
1353 if (err)
1354 goto out;
1355 }
1356 }
1357out:
1358 return err;
1359}
1360
1361static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
1362 struct vm_area_struct *vma, unsigned long addr, pte_t pte)
1363{
1364 u64 frame = 0, flags = 0;
1365 struct page *page = NULL;
1366
1367 if (pte_present(pte)) {
1368 if (pm->show_pfn)
1369 frame = pte_pfn(pte);
1370 flags |= PM_PRESENT;
1371 page = vm_normal_page(vma, addr, pte);
1372 if (pte_soft_dirty(pte))
1373 flags |= PM_SOFT_DIRTY;
1374 if (pte_uffd_wp(pte))
1375 flags |= PM_UFFD_WP;
1376 } else if (is_swap_pte(pte)) {
1377 swp_entry_t entry;
1378 if (pte_swp_soft_dirty(pte))
1379 flags |= PM_SOFT_DIRTY;
1380 if (pte_swp_uffd_wp(pte))
1381 flags |= PM_UFFD_WP;
1382 entry = pte_to_swp_entry(pte);
1383 if (pm->show_pfn)
1384 frame = swp_type(entry) |
1385 (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
1386 flags |= PM_SWAP;
1387 if (is_pfn_swap_entry(entry))
1388 page = pfn_swap_entry_to_page(entry);
1389 }
1390
1391 if (page && !PageAnon(page))
1392 flags |= PM_FILE;
1393 if (page && page_mapcount(page) == 1)
1394 flags |= PM_MMAP_EXCLUSIVE;
1395 if (vma->vm_flags & VM_SOFTDIRTY)
1396 flags |= PM_SOFT_DIRTY;
1397
1398 return make_pme(frame, flags);
1399}
1400
1401static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
1402 struct mm_walk *walk)
1403{
1404 struct vm_area_struct *vma = walk->vma;
1405 struct pagemapread *pm = walk->private;
1406 spinlock_t *ptl;
1407 pte_t *pte, *orig_pte;
1408 int err = 0;
1409
1410#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1411 ptl = pmd_trans_huge_lock(pmdp, vma);
1412 if (ptl) {
1413 u64 flags = 0, frame = 0;
1414 pmd_t pmd = *pmdp;
1415 struct page *page = NULL;
1416
1417 if (vma->vm_flags & VM_SOFTDIRTY)
1418 flags |= PM_SOFT_DIRTY;
1419
1420 if (pmd_present(pmd)) {
1421 page = pmd_page(pmd);
1422
1423 flags |= PM_PRESENT;
1424 if (pmd_soft_dirty(pmd))
1425 flags |= PM_SOFT_DIRTY;
1426 if (pmd_uffd_wp(pmd))
1427 flags |= PM_UFFD_WP;
1428 if (pm->show_pfn)
1429 frame = pmd_pfn(pmd) +
1430 ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1431 }
1432#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
1433 else if (is_swap_pmd(pmd)) {
1434 swp_entry_t entry = pmd_to_swp_entry(pmd);
1435 unsigned long offset;
1436
1437 if (pm->show_pfn) {
1438 offset = swp_offset(entry) +
1439 ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1440 frame = swp_type(entry) |
1441 (offset << MAX_SWAPFILES_SHIFT);
1442 }
1443 flags |= PM_SWAP;
1444 if (pmd_swp_soft_dirty(pmd))
1445 flags |= PM_SOFT_DIRTY;
1446 if (pmd_swp_uffd_wp(pmd))
1447 flags |= PM_UFFD_WP;
1448 VM_BUG_ON(!is_pmd_migration_entry(pmd));
1449 page = pfn_swap_entry_to_page(entry);
1450 }
1451#endif
1452
1453 if (page && page_mapcount(page) == 1)
1454 flags |= PM_MMAP_EXCLUSIVE;
1455
1456 for (; addr != end; addr += PAGE_SIZE) {
1457 pagemap_entry_t pme = make_pme(frame, flags);
1458
1459 err = add_to_pagemap(addr, &pme, pm);
1460 if (err)
1461 break;
1462 if (pm->show_pfn) {
1463 if (flags & PM_PRESENT)
1464 frame++;
1465 else if (flags & PM_SWAP)
1466 frame += (1 << MAX_SWAPFILES_SHIFT);
1467 }
1468 }
1469 spin_unlock(ptl);
1470 return err;
1471 }
1472
1473 if (pmd_trans_unstable(pmdp))
1474 return 0;
1475#endif
1476
1477
1478
1479
1480
1481 orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
1482 for (; addr < end; pte++, addr += PAGE_SIZE) {
1483 pagemap_entry_t pme;
1484
1485 pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
1486 err = add_to_pagemap(addr, &pme, pm);
1487 if (err)
1488 break;
1489 }
1490 pte_unmap_unlock(orig_pte, ptl);
1491
1492 cond_resched();
1493
1494 return err;
1495}
1496
1497#ifdef CONFIG_HUGETLB_PAGE
1498
1499static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
1500 unsigned long addr, unsigned long end,
1501 struct mm_walk *walk)
1502{
1503 struct pagemapread *pm = walk->private;
1504 struct vm_area_struct *vma = walk->vma;
1505 u64 flags = 0, frame = 0;
1506 int err = 0;
1507 pte_t pte;
1508
1509 if (vma->vm_flags & VM_SOFTDIRTY)
1510 flags |= PM_SOFT_DIRTY;
1511
1512 pte = huge_ptep_get(ptep);
1513 if (pte_present(pte)) {
1514 struct page *page = pte_page(pte);
1515
1516 if (!PageAnon(page))
1517 flags |= PM_FILE;
1518
1519 if (page_mapcount(page) == 1)
1520 flags |= PM_MMAP_EXCLUSIVE;
1521
1522 flags |= PM_PRESENT;
1523 if (pm->show_pfn)
1524 frame = pte_pfn(pte) +
1525 ((addr & ~hmask) >> PAGE_SHIFT);
1526 }
1527
1528 for (; addr != end; addr += PAGE_SIZE) {
1529 pagemap_entry_t pme = make_pme(frame, flags);
1530
1531 err = add_to_pagemap(addr, &pme, pm);
1532 if (err)
1533 return err;
1534 if (pm->show_pfn && (flags & PM_PRESENT))
1535 frame++;
1536 }
1537
1538 cond_resched();
1539
1540 return err;
1541}
1542#else
1543#define pagemap_hugetlb_range NULL
1544#endif
1545
1546static const struct mm_walk_ops pagemap_ops = {
1547 .pmd_entry = pagemap_pmd_range,
1548 .pte_hole = pagemap_pte_hole,
1549 .hugetlb_entry = pagemap_hugetlb_range,
1550};
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578static ssize_t pagemap_read(struct file *file, char __user *buf,
1579 size_t count, loff_t *ppos)
1580{
1581 struct mm_struct *mm = file->private_data;
1582 struct pagemapread pm;
1583 unsigned long src;
1584 unsigned long svpfn;
1585 unsigned long start_vaddr;
1586 unsigned long end_vaddr;
1587 int ret = 0, copied = 0;
1588
1589 if (!mm || !mmget_not_zero(mm))
1590 goto out;
1591
1592 ret = -EINVAL;
1593
1594 if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
1595 goto out_mm;
1596
1597 ret = 0;
1598 if (!count)
1599 goto out_mm;
1600
1601
1602 pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
1603
1604 pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
1605 pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL);
1606 ret = -ENOMEM;
1607 if (!pm.buffer)
1608 goto out_mm;
1609
1610 src = *ppos;
1611 svpfn = src / PM_ENTRY_BYTES;
1612 end_vaddr = mm->task_size;
1613
1614
1615 start_vaddr = end_vaddr;
1616 if (svpfn <= (ULONG_MAX >> PAGE_SHIFT))
1617 start_vaddr = untagged_addr(svpfn << PAGE_SHIFT);
1618
1619
1620 if (start_vaddr > mm->task_size)
1621 start_vaddr = end_vaddr;
1622
1623
1624
1625
1626
1627
1628
1629 ret = 0;
1630 while (count && (start_vaddr < end_vaddr)) {
1631 int len;
1632 unsigned long end;
1633
1634 pm.pos = 0;
1635 end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
1636
1637 if (end < start_vaddr || end > end_vaddr)
1638 end = end_vaddr;
1639 ret = mmap_read_lock_killable(mm);
1640 if (ret)
1641 goto out_free;
1642 ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
1643 mmap_read_unlock(mm);
1644 start_vaddr = end;
1645
1646 len = min(count, PM_ENTRY_BYTES * pm.pos);
1647 if (copy_to_user(buf, pm.buffer, len)) {
1648 ret = -EFAULT;
1649 goto out_free;
1650 }
1651 copied += len;
1652 buf += len;
1653 count -= len;
1654 }
1655 *ppos += copied;
1656 if (!ret || ret == PM_END_OF_BUFFER)
1657 ret = copied;
1658
1659out_free:
1660 kfree(pm.buffer);
1661out_mm:
1662 mmput(mm);
1663out:
1664 return ret;
1665}
1666
1667static int pagemap_open(struct inode *inode, struct file *file)
1668{
1669 struct mm_struct *mm;
1670
1671 mm = proc_mem_open(inode, PTRACE_MODE_READ);
1672 if (IS_ERR(mm))
1673 return PTR_ERR(mm);
1674 file->private_data = mm;
1675 return 0;
1676}
1677
1678static int pagemap_release(struct inode *inode, struct file *file)
1679{
1680 struct mm_struct *mm = file->private_data;
1681
1682 if (mm)
1683 mmdrop(mm);
1684 return 0;
1685}
1686
1687const struct file_operations proc_pagemap_operations = {
1688 .llseek = mem_lseek,
1689 .read = pagemap_read,
1690 .open = pagemap_open,
1691 .release = pagemap_release,
1692};
1693#endif
1694
1695#ifdef CONFIG_NUMA
1696
1697struct numa_maps {
1698 unsigned long pages;
1699 unsigned long anon;
1700 unsigned long active;
1701 unsigned long writeback;
1702 unsigned long mapcount_max;
1703 unsigned long dirty;
1704 unsigned long swapcache;
1705 unsigned long node[MAX_NUMNODES];
1706};
1707
1708struct numa_maps_private {
1709 struct proc_maps_private proc_maps;
1710 struct numa_maps md;
1711};
1712
1713static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
1714 unsigned long nr_pages)
1715{
1716 int count = page_mapcount(page);
1717
1718 md->pages += nr_pages;
1719 if (pte_dirty || PageDirty(page))
1720 md->dirty += nr_pages;
1721
1722 if (PageSwapCache(page))
1723 md->swapcache += nr_pages;
1724
1725 if (PageActive(page) || PageUnevictable(page))
1726 md->active += nr_pages;
1727
1728 if (PageWriteback(page))
1729 md->writeback += nr_pages;
1730
1731 if (PageAnon(page))
1732 md->anon += nr_pages;
1733
1734 if (count > md->mapcount_max)
1735 md->mapcount_max = count;
1736
1737 md->node[page_to_nid(page)] += nr_pages;
1738}
1739
1740static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
1741 unsigned long addr)
1742{
1743 struct page *page;
1744 int nid;
1745
1746 if (!pte_present(pte))
1747 return NULL;
1748
1749 page = vm_normal_page(vma, addr, pte);
1750 if (!page)
1751 return NULL;
1752
1753 if (PageReserved(page))
1754 return NULL;
1755
1756 nid = page_to_nid(page);
1757 if (!node_isset(nid, node_states[N_MEMORY]))
1758 return NULL;
1759
1760 return page;
1761}
1762
1763#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1764static struct page *can_gather_numa_stats_pmd(pmd_t pmd,
1765 struct vm_area_struct *vma,
1766 unsigned long addr)
1767{
1768 struct page *page;
1769 int nid;
1770
1771 if (!pmd_present(pmd))
1772 return NULL;
1773
1774 page = vm_normal_page_pmd(vma, addr, pmd);
1775 if (!page)
1776 return NULL;
1777
1778 if (PageReserved(page))
1779 return NULL;
1780
1781 nid = page_to_nid(page);
1782 if (!node_isset(nid, node_states[N_MEMORY]))
1783 return NULL;
1784
1785 return page;
1786}
1787#endif
1788
1789static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
1790 unsigned long end, struct mm_walk *walk)
1791{
1792 struct numa_maps *md = walk->private;
1793 struct vm_area_struct *vma = walk->vma;
1794 spinlock_t *ptl;
1795 pte_t *orig_pte;
1796 pte_t *pte;
1797
1798#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1799 ptl = pmd_trans_huge_lock(pmd, vma);
1800 if (ptl) {
1801 struct page *page;
1802
1803 page = can_gather_numa_stats_pmd(*pmd, vma, addr);
1804 if (page)
1805 gather_stats(page, md, pmd_dirty(*pmd),
1806 HPAGE_PMD_SIZE/PAGE_SIZE);
1807 spin_unlock(ptl);
1808 return 0;
1809 }
1810
1811 if (pmd_trans_unstable(pmd))
1812 return 0;
1813#endif
1814 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
1815 do {
1816 struct page *page = can_gather_numa_stats(*pte, vma, addr);
1817 if (!page)
1818 continue;
1819 gather_stats(page, md, pte_dirty(*pte), 1);
1820
1821 } while (pte++, addr += PAGE_SIZE, addr != end);
1822 pte_unmap_unlock(orig_pte, ptl);
1823 cond_resched();
1824 return 0;
1825}
1826#ifdef CONFIG_HUGETLB_PAGE
1827static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
1828 unsigned long addr, unsigned long end, struct mm_walk *walk)
1829{
1830 pte_t huge_pte = huge_ptep_get(pte);
1831 struct numa_maps *md;
1832 struct page *page;
1833
1834 if (!pte_present(huge_pte))
1835 return 0;
1836
1837 page = pte_page(huge_pte);
1838 if (!page)
1839 return 0;
1840
1841 md = walk->private;
1842 gather_stats(page, md, pte_dirty(huge_pte), 1);
1843 return 0;
1844}
1845
1846#else
1847static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
1848 unsigned long addr, unsigned long end, struct mm_walk *walk)
1849{
1850 return 0;
1851}
1852#endif
1853
1854static const struct mm_walk_ops show_numa_ops = {
1855 .hugetlb_entry = gather_hugetlb_stats,
1856 .pmd_entry = gather_pte_stats,
1857};
1858
1859
1860
1861
1862static int show_numa_map(struct seq_file *m, void *v)
1863{
1864 struct numa_maps_private *numa_priv = m->private;
1865 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
1866 struct vm_area_struct *vma = v;
1867 struct numa_maps *md = &numa_priv->md;
1868 struct file *file = vma->vm_file;
1869 struct mm_struct *mm = vma->vm_mm;
1870 struct mempolicy *pol;
1871 char buffer[64];
1872 int nid;
1873
1874 if (!mm)
1875 return 0;
1876
1877
1878 memset(md, 0, sizeof(*md));
1879
1880 pol = __get_vma_policy(vma, vma->vm_start);
1881 if (pol) {
1882 mpol_to_str(buffer, sizeof(buffer), pol);
1883 mpol_cond_put(pol);
1884 } else {
1885 mpol_to_str(buffer, sizeof(buffer), proc_priv->task_mempolicy);
1886 }
1887
1888 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1889
1890 if (file) {
1891 seq_puts(m, " file=");
1892 seq_file_path(m, file, "\n\t= ");
1893 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1894 seq_puts(m, " heap");
1895 } else if (is_stack(vma)) {
1896 seq_puts(m, " stack");
1897 }
1898
1899 if (is_vm_hugetlb_page(vma))
1900 seq_puts(m, " huge");
1901
1902
1903 walk_page_vma(vma, &show_numa_ops, md);
1904
1905 if (!md->pages)
1906 goto out;
1907
1908 if (md->anon)
1909 seq_printf(m, " anon=%lu", md->anon);
1910
1911 if (md->dirty)
1912 seq_printf(m, " dirty=%lu", md->dirty);
1913
1914 if (md->pages != md->anon && md->pages != md->dirty)
1915 seq_printf(m, " mapped=%lu", md->pages);
1916
1917 if (md->mapcount_max > 1)
1918 seq_printf(m, " mapmax=%lu", md->mapcount_max);
1919
1920 if (md->swapcache)
1921 seq_printf(m, " swapcache=%lu", md->swapcache);
1922
1923 if (md->active < md->pages && !is_vm_hugetlb_page(vma))
1924 seq_printf(m, " active=%lu", md->active);
1925
1926 if (md->writeback)
1927 seq_printf(m, " writeback=%lu", md->writeback);
1928
1929 for_each_node_state(nid, N_MEMORY)
1930 if (md->node[nid])
1931 seq_printf(m, " N%d=%lu", nid, md->node[nid]);
1932
1933 seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10);
1934out:
1935 seq_putc(m, '\n');
1936 return 0;
1937}
1938
1939static const struct seq_operations proc_pid_numa_maps_op = {
1940 .start = m_start,
1941 .next = m_next,
1942 .stop = m_stop,
1943 .show = show_numa_map,
1944};
1945
1946static int pid_numa_maps_open(struct inode *inode, struct file *file)
1947{
1948 return proc_maps_open(inode, file, &proc_pid_numa_maps_op,
1949 sizeof(struct numa_maps_private));
1950}
1951
1952const struct file_operations proc_pid_numa_maps_operations = {
1953 .open = pid_numa_maps_open,
1954 .read = seq_read,
1955 .llseek = seq_lseek,
1956 .release = proc_map_release,
1957};
1958
1959#endif
1960