1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/errno.h>
18#include <linux/mm.h>
19#include <linux/fs.h>
20#include <linux/mman.h>
21#include <linux/sched.h>
22#include <linux/rwsem.h>
23#include <linux/pagemap.h>
24#include <linux/rmap.h>
25#include <linux/spinlock.h>
26#include <linux/jhash.h>
27#include <linux/delay.h>
28#include <linux/kthread.h>
29#include <linux/wait.h>
30#include <linux/slab.h>
31#include <linux/rbtree.h>
32#include <linux/memory.h>
33#include <linux/mmu_notifier.h>
34#include <linux/swap.h>
35#include <linux/ksm.h>
36#include <linux/hash.h>
37#include <linux/freezer.h>
38#include <linux/oom.h>
39
40#include <asm/tlbflush.h>
41#include "internal.h"
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90struct mm_slot {
91 struct hlist_node link;
92 struct list_head mm_list;
93 struct rmap_item *rmap_list;
94 struct mm_struct *mm;
95};
96
97
98
99
100
101
102
103
104
105
106struct ksm_scan {
107 struct mm_slot *mm_slot;
108 unsigned long address;
109 struct rmap_item **rmap_list;
110 unsigned long seqnr;
111};
112
113
114
115
116
117
118
119struct stable_node {
120 struct rb_node node;
121 struct hlist_head hlist;
122 unsigned long kpfn;
123};
124
125
126
127
128
129
130
131
132
133
134
135
136struct rmap_item {
137 struct rmap_item *rmap_list;
138 struct anon_vma *anon_vma;
139 struct mm_struct *mm;
140 unsigned long address;
141 unsigned int oldchecksum;
142 union {
143 struct rb_node node;
144 struct {
145 struct stable_node *head;
146 struct hlist_node hlist;
147 };
148 };
149};
150
151#define SEQNR_MASK 0x0ff
152#define UNSTABLE_FLAG 0x100
153#define STABLE_FLAG 0x200
154
155
156static struct rb_root root_stable_tree = RB_ROOT;
157static struct rb_root root_unstable_tree = RB_ROOT;
158
159#define MM_SLOTS_HASH_SHIFT 10
160#define MM_SLOTS_HASH_HEADS (1 << MM_SLOTS_HASH_SHIFT)
161static struct hlist_head mm_slots_hash[MM_SLOTS_HASH_HEADS];
162
163static struct mm_slot ksm_mm_head = {
164 .mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
165};
166static struct ksm_scan ksm_scan = {
167 .mm_slot = &ksm_mm_head,
168};
169
170static struct kmem_cache *rmap_item_cache;
171static struct kmem_cache *stable_node_cache;
172static struct kmem_cache *mm_slot_cache;
173
174
175static unsigned long ksm_pages_shared;
176
177
178static unsigned long ksm_pages_sharing;
179
180
181static unsigned long ksm_pages_unshared;
182
183
184static unsigned long ksm_rmap_items;
185
186
187static unsigned int ksm_thread_pages_to_scan = 100;
188
189
190static unsigned int ksm_thread_sleep_millisecs = 20;
191
192#define KSM_RUN_STOP 0
193#define KSM_RUN_MERGE 1
194#define KSM_RUN_UNMERGE 2
195static unsigned int ksm_run = KSM_RUN_STOP;
196
197static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
198static DEFINE_MUTEX(ksm_thread_mutex);
199static DEFINE_SPINLOCK(ksm_mmlist_lock);
200
201#define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
202 sizeof(struct __struct), __alignof__(struct __struct),\
203 (__flags), NULL)
204
205static int __init ksm_slab_init(void)
206{
207 rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
208 if (!rmap_item_cache)
209 goto out;
210
211 stable_node_cache = KSM_KMEM_CACHE(stable_node, 0);
212 if (!stable_node_cache)
213 goto out_free1;
214
215 mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
216 if (!mm_slot_cache)
217 goto out_free2;
218
219 return 0;
220
221out_free2:
222 kmem_cache_destroy(stable_node_cache);
223out_free1:
224 kmem_cache_destroy(rmap_item_cache);
225out:
226 return -ENOMEM;
227}
228
229static void __init ksm_slab_free(void)
230{
231 kmem_cache_destroy(mm_slot_cache);
232 kmem_cache_destroy(stable_node_cache);
233 kmem_cache_destroy(rmap_item_cache);
234 mm_slot_cache = NULL;
235}
236
237static inline struct rmap_item *alloc_rmap_item(void)
238{
239 struct rmap_item *rmap_item;
240
241 rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
242 if (rmap_item)
243 ksm_rmap_items++;
244 return rmap_item;
245}
246
247static inline void free_rmap_item(struct rmap_item *rmap_item)
248{
249 ksm_rmap_items--;
250 rmap_item->mm = NULL;
251 kmem_cache_free(rmap_item_cache, rmap_item);
252}
253
254static inline struct stable_node *alloc_stable_node(void)
255{
256 return kmem_cache_alloc(stable_node_cache, GFP_KERNEL);
257}
258
259static inline void free_stable_node(struct stable_node *stable_node)
260{
261 kmem_cache_free(stable_node_cache, stable_node);
262}
263
264static inline struct mm_slot *alloc_mm_slot(void)
265{
266 if (!mm_slot_cache)
267 return NULL;
268 return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
269}
270
271static inline void free_mm_slot(struct mm_slot *mm_slot)
272{
273 kmem_cache_free(mm_slot_cache, mm_slot);
274}
275
276static struct mm_slot *get_mm_slot(struct mm_struct *mm)
277{
278 struct mm_slot *mm_slot;
279 struct hlist_head *bucket;
280 struct hlist_node *node;
281
282 bucket = &mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
283 hlist_for_each_entry(mm_slot, node, bucket, link) {
284 if (mm == mm_slot->mm)
285 return mm_slot;
286 }
287 return NULL;
288}
289
290static void insert_to_mm_slots_hash(struct mm_struct *mm,
291 struct mm_slot *mm_slot)
292{
293 struct hlist_head *bucket;
294
295 bucket = &mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
296 mm_slot->mm = mm;
297 hlist_add_head(&mm_slot->link, bucket);
298}
299
300static inline int in_stable_tree(struct rmap_item *rmap_item)
301{
302 return rmap_item->address & STABLE_FLAG;
303}
304
305
306
307
308
309
310
311
312
313static inline bool ksm_test_exit(struct mm_struct *mm)
314{
315 return atomic_read(&mm->mm_users) == 0;
316}
317
318
319
320
321
322
323
324
325
326
327
328
329static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
330{
331 struct page *page;
332 int ret = 0;
333
334 do {
335 cond_resched();
336 page = follow_page(vma, addr, FOLL_GET);
337 if (IS_ERR_OR_NULL(page))
338 break;
339 if (PageKsm(page))
340 ret = handle_mm_fault(vma->vm_mm, vma, addr,
341 FAULT_FLAG_WRITE);
342 else
343 ret = VM_FAULT_WRITE;
344 put_page(page);
345 } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_OOM)));
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374 return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
375}
376
377static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
378 unsigned long addr)
379{
380 struct vm_area_struct *vma;
381 if (ksm_test_exit(mm))
382 return NULL;
383 vma = find_vma(mm, addr);
384 if (!vma || vma->vm_start > addr)
385 return NULL;
386 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
387 return NULL;
388 return vma;
389}
390
391static void break_cow(struct rmap_item *rmap_item)
392{
393 struct mm_struct *mm = rmap_item->mm;
394 unsigned long addr = rmap_item->address;
395 struct vm_area_struct *vma;
396
397
398
399
400
401 put_anon_vma(rmap_item->anon_vma);
402
403 down_read(&mm->mmap_sem);
404 vma = find_mergeable_vma(mm, addr);
405 if (vma)
406 break_ksm(vma, addr);
407 up_read(&mm->mmap_sem);
408}
409
410static struct page *page_trans_compound_anon(struct page *page)
411{
412 if (PageTransCompound(page)) {
413 struct page *head = compound_trans_head(page);
414
415
416
417
418 if (PageAnon(head))
419 return head;
420 }
421 return NULL;
422}
423
424static struct page *get_mergeable_page(struct rmap_item *rmap_item)
425{
426 struct mm_struct *mm = rmap_item->mm;
427 unsigned long addr = rmap_item->address;
428 struct vm_area_struct *vma;
429 struct page *page;
430
431 down_read(&mm->mmap_sem);
432 vma = find_mergeable_vma(mm, addr);
433 if (!vma)
434 goto out;
435
436 page = follow_page(vma, addr, FOLL_GET);
437 if (IS_ERR_OR_NULL(page))
438 goto out;
439 if (PageAnon(page) || page_trans_compound_anon(page)) {
440 flush_anon_page(vma, page, addr);
441 flush_dcache_page(page);
442 } else {
443 put_page(page);
444out: page = NULL;
445 }
446 up_read(&mm->mmap_sem);
447 return page;
448}
449
450static void remove_node_from_stable_tree(struct stable_node *stable_node)
451{
452 struct rmap_item *rmap_item;
453 struct hlist_node *hlist;
454
455 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
456 if (rmap_item->hlist.next)
457 ksm_pages_sharing--;
458 else
459 ksm_pages_shared--;
460 put_anon_vma(rmap_item->anon_vma);
461 rmap_item->address &= PAGE_MASK;
462 cond_resched();
463 }
464
465 rb_erase(&stable_node->node, &root_stable_tree);
466 free_stable_node(stable_node);
467}
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498static struct page *get_ksm_page(struct stable_node *stable_node)
499{
500 struct page *page;
501 void *expected_mapping;
502
503 page = pfn_to_page(stable_node->kpfn);
504 expected_mapping = (void *)stable_node +
505 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
506 rcu_read_lock();
507 if (page->mapping != expected_mapping)
508 goto stale;
509 if (!get_page_unless_zero(page))
510 goto stale;
511 if (page->mapping != expected_mapping) {
512 put_page(page);
513 goto stale;
514 }
515 rcu_read_unlock();
516 return page;
517stale:
518 rcu_read_unlock();
519 remove_node_from_stable_tree(stable_node);
520 return NULL;
521}
522
523
524
525
526
527static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
528{
529 if (rmap_item->address & STABLE_FLAG) {
530 struct stable_node *stable_node;
531 struct page *page;
532
533 stable_node = rmap_item->head;
534 page = get_ksm_page(stable_node);
535 if (!page)
536 goto out;
537
538 lock_page(page);
539 hlist_del(&rmap_item->hlist);
540 unlock_page(page);
541 put_page(page);
542
543 if (stable_node->hlist.first)
544 ksm_pages_sharing--;
545 else
546 ksm_pages_shared--;
547
548 put_anon_vma(rmap_item->anon_vma);
549 rmap_item->address &= PAGE_MASK;
550
551 } else if (rmap_item->address & UNSTABLE_FLAG) {
552 unsigned char age;
553
554
555
556
557
558
559
560 age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
561 BUG_ON(age > 1);
562 if (!age)
563 rb_erase(&rmap_item->node, &root_unstable_tree);
564
565 ksm_pages_unshared--;
566 rmap_item->address &= PAGE_MASK;
567 }
568out:
569 cond_resched();
570}
571
572static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
573 struct rmap_item **rmap_list)
574{
575 while (*rmap_list) {
576 struct rmap_item *rmap_item = *rmap_list;
577 *rmap_list = rmap_item->rmap_list;
578 remove_rmap_item_from_tree(rmap_item);
579 free_rmap_item(rmap_item);
580 }
581}
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596static int unmerge_ksm_pages(struct vm_area_struct *vma,
597 unsigned long start, unsigned long end)
598{
599 unsigned long addr;
600 int err = 0;
601
602 for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
603 if (ksm_test_exit(vma->vm_mm))
604 break;
605 if (signal_pending(current))
606 err = -ERESTARTSYS;
607 else
608 err = break_ksm(vma, addr);
609 }
610 return err;
611}
612
613#ifdef CONFIG_SYSFS
614
615
616
617static int unmerge_and_remove_all_rmap_items(void)
618{
619 struct mm_slot *mm_slot;
620 struct mm_struct *mm;
621 struct vm_area_struct *vma;
622 int err = 0;
623
624 spin_lock(&ksm_mmlist_lock);
625 ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
626 struct mm_slot, mm_list);
627 spin_unlock(&ksm_mmlist_lock);
628
629 for (mm_slot = ksm_scan.mm_slot;
630 mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
631 mm = mm_slot->mm;
632 down_read(&mm->mmap_sem);
633 for (vma = mm->mmap; vma; vma = vma->vm_next) {
634 if (ksm_test_exit(mm))
635 break;
636 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
637 continue;
638 err = unmerge_ksm_pages(vma,
639 vma->vm_start, vma->vm_end);
640 if (err)
641 goto error;
642 }
643
644 remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
645
646 spin_lock(&ksm_mmlist_lock);
647 ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
648 struct mm_slot, mm_list);
649 if (ksm_test_exit(mm)) {
650 hlist_del(&mm_slot->link);
651 list_del(&mm_slot->mm_list);
652 spin_unlock(&ksm_mmlist_lock);
653
654 free_mm_slot(mm_slot);
655 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
656 up_read(&mm->mmap_sem);
657 mmdrop(mm);
658 } else {
659 spin_unlock(&ksm_mmlist_lock);
660 up_read(&mm->mmap_sem);
661 }
662 }
663
664 ksm_scan.seqnr = 0;
665 return 0;
666
667error:
668 up_read(&mm->mmap_sem);
669 spin_lock(&ksm_mmlist_lock);
670 ksm_scan.mm_slot = &ksm_mm_head;
671 spin_unlock(&ksm_mmlist_lock);
672 return err;
673}
674#endif
675
676static u32 calc_checksum(struct page *page)
677{
678 u32 checksum;
679 void *addr = kmap_atomic(page);
680 checksum = jhash2(addr, PAGE_SIZE / 4, 17);
681 kunmap_atomic(addr);
682 return checksum;
683}
684
685static int memcmp_pages(struct page *page1, struct page *page2)
686{
687 char *addr1, *addr2;
688 int ret;
689
690 addr1 = kmap_atomic(page1);
691 addr2 = kmap_atomic(page2);
692 ret = memcmp(addr1, addr2, PAGE_SIZE);
693 kunmap_atomic(addr2);
694 kunmap_atomic(addr1);
695 return ret;
696}
697
698static inline int pages_identical(struct page *page1, struct page *page2)
699{
700 return !memcmp_pages(page1, page2);
701}
702
703static int write_protect_page(struct vm_area_struct *vma, struct page *page,
704 pte_t *orig_pte)
705{
706 struct mm_struct *mm = vma->vm_mm;
707 unsigned long addr;
708 pte_t *ptep;
709 spinlock_t *ptl;
710 int swapped;
711 int err = -EFAULT;
712 unsigned long mmun_start;
713 unsigned long mmun_end;
714
715 addr = page_address_in_vma(page, vma);
716 if (addr == -EFAULT)
717 goto out;
718
719 BUG_ON(PageTransCompound(page));
720
721 mmun_start = addr;
722 mmun_end = addr + PAGE_SIZE;
723 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
724
725 ptep = page_check_address(page, mm, addr, &ptl, 0);
726 if (!ptep)
727 goto out_mn;
728
729 if (pte_write(*ptep) || pte_dirty(*ptep)) {
730 pte_t entry;
731
732 swapped = PageSwapCache(page);
733 flush_cache_page(vma, addr, page_to_pfn(page));
734
735
736
737
738
739
740
741
742
743 entry = ptep_clear_flush(vma, addr, ptep);
744
745
746
747
748 if (page_mapcount(page) + 1 + swapped != page_count(page)) {
749 set_pte_at(mm, addr, ptep, entry);
750 goto out_unlock;
751 }
752 if (pte_dirty(entry))
753 set_page_dirty(page);
754 entry = pte_mkclean(pte_wrprotect(entry));
755 set_pte_at_notify(mm, addr, ptep, entry);
756 }
757 *orig_pte = *ptep;
758 err = 0;
759
760out_unlock:
761 pte_unmap_unlock(ptep, ptl);
762out_mn:
763 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
764out:
765 return err;
766}
767
768
769
770
771
772
773
774
775
776
777static int replace_page(struct vm_area_struct *vma, struct page *page,
778 struct page *kpage, pte_t orig_pte)
779{
780 struct mm_struct *mm = vma->vm_mm;
781 pmd_t *pmd;
782 pte_t *ptep;
783 spinlock_t *ptl;
784 unsigned long addr;
785 int err = -EFAULT;
786 unsigned long mmun_start;
787 unsigned long mmun_end;
788
789 addr = page_address_in_vma(page, vma);
790 if (addr == -EFAULT)
791 goto out;
792
793 pmd = mm_find_pmd(mm, addr);
794 if (!pmd)
795 goto out;
796 BUG_ON(pmd_trans_huge(*pmd));
797
798 mmun_start = addr;
799 mmun_end = addr + PAGE_SIZE;
800 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
801
802 ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
803 if (!pte_same(*ptep, orig_pte)) {
804 pte_unmap_unlock(ptep, ptl);
805 goto out_mn;
806 }
807
808 get_page(kpage);
809 page_add_anon_rmap(kpage, vma, addr);
810
811 flush_cache_page(vma, addr, pte_pfn(*ptep));
812 ptep_clear_flush(vma, addr, ptep);
813 set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
814
815 page_remove_rmap(page);
816 if (!page_mapped(page))
817 try_to_free_swap(page);
818 put_page(page);
819
820 pte_unmap_unlock(ptep, ptl);
821 err = 0;
822out_mn:
823 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
824out:
825 return err;
826}
827
828static int page_trans_compound_anon_split(struct page *page)
829{
830 int ret = 0;
831 struct page *transhuge_head = page_trans_compound_anon(page);
832 if (transhuge_head) {
833
834 if (get_page_unless_zero(transhuge_head)) {
835
836
837
838
839 if (PageAnon(transhuge_head))
840 ret = split_huge_page(transhuge_head);
841 else
842
843
844
845
846 ret = 1;
847 put_page(transhuge_head);
848 } else
849
850 ret = 1;
851 }
852 return ret;
853}
854
855
856
857
858
859
860
861
862
863
864static int try_to_merge_one_page(struct vm_area_struct *vma,
865 struct page *page, struct page *kpage)
866{
867 pte_t orig_pte = __pte(0);
868 int err = -EFAULT;
869
870 if (page == kpage)
871 return 0;
872
873 if (!(vma->vm_flags & VM_MERGEABLE))
874 goto out;
875 if (PageTransCompound(page) && page_trans_compound_anon_split(page))
876 goto out;
877 BUG_ON(PageTransCompound(page));
878 if (!PageAnon(page))
879 goto out;
880
881
882
883
884
885
886
887
888 if (!trylock_page(page))
889 goto out;
890
891
892
893
894
895
896 if (write_protect_page(vma, page, &orig_pte) == 0) {
897 if (!kpage) {
898
899
900
901
902
903 set_page_stable_node(page, NULL);
904 mark_page_accessed(page);
905 err = 0;
906 } else if (pages_identical(page, kpage))
907 err = replace_page(vma, page, kpage, orig_pte);
908 }
909
910 if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
911 munlock_vma_page(page);
912 if (!PageMlocked(kpage)) {
913 unlock_page(page);
914 lock_page(kpage);
915 mlock_vma_page(kpage);
916 page = kpage;
917 }
918 }
919
920 unlock_page(page);
921out:
922 return err;
923}
924
925
926
927
928
929
930
931static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
932 struct page *page, struct page *kpage)
933{
934 struct mm_struct *mm = rmap_item->mm;
935 struct vm_area_struct *vma;
936 int err = -EFAULT;
937
938 down_read(&mm->mmap_sem);
939 if (ksm_test_exit(mm))
940 goto out;
941 vma = find_vma(mm, rmap_item->address);
942 if (!vma || vma->vm_start > rmap_item->address)
943 goto out;
944
945 err = try_to_merge_one_page(vma, page, kpage);
946 if (err)
947 goto out;
948
949
950 rmap_item->anon_vma = vma->anon_vma;
951 get_anon_vma(vma->anon_vma);
952out:
953 up_read(&mm->mmap_sem);
954 return err;
955}
956
957
958
959
960
961
962
963
964
965
966
967static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
968 struct page *page,
969 struct rmap_item *tree_rmap_item,
970 struct page *tree_page)
971{
972 int err;
973
974 err = try_to_merge_with_ksm_page(rmap_item, page, NULL);
975 if (!err) {
976 err = try_to_merge_with_ksm_page(tree_rmap_item,
977 tree_page, page);
978
979
980
981
982 if (err)
983 break_cow(rmap_item);
984 }
985 return err ? NULL : page;
986}
987
988
989
990
991
992
993
994
995
996
997static struct page *stable_tree_search(struct page *page)
998{
999 struct rb_node *node = root_stable_tree.rb_node;
1000 struct stable_node *stable_node;
1001
1002 stable_node = page_stable_node(page);
1003 if (stable_node) {
1004 get_page(page);
1005 return page;
1006 }
1007
1008 while (node) {
1009 struct page *tree_page;
1010 int ret;
1011
1012 cond_resched();
1013 stable_node = rb_entry(node, struct stable_node, node);
1014 tree_page = get_ksm_page(stable_node);
1015 if (!tree_page)
1016 return NULL;
1017
1018 ret = memcmp_pages(page, tree_page);
1019
1020 if (ret < 0) {
1021 put_page(tree_page);
1022 node = node->rb_left;
1023 } else if (ret > 0) {
1024 put_page(tree_page);
1025 node = node->rb_right;
1026 } else
1027 return tree_page;
1028 }
1029
1030 return NULL;
1031}
1032
1033
1034
1035
1036
1037
1038
1039
1040static struct stable_node *stable_tree_insert(struct page *kpage)
1041{
1042 struct rb_node **new = &root_stable_tree.rb_node;
1043 struct rb_node *parent = NULL;
1044 struct stable_node *stable_node;
1045
1046 while (*new) {
1047 struct page *tree_page;
1048 int ret;
1049
1050 cond_resched();
1051 stable_node = rb_entry(*new, struct stable_node, node);
1052 tree_page = get_ksm_page(stable_node);
1053 if (!tree_page)
1054 return NULL;
1055
1056 ret = memcmp_pages(kpage, tree_page);
1057 put_page(tree_page);
1058
1059 parent = *new;
1060 if (ret < 0)
1061 new = &parent->rb_left;
1062 else if (ret > 0)
1063 new = &parent->rb_right;
1064 else {
1065
1066
1067
1068
1069
1070 return NULL;
1071 }
1072 }
1073
1074 stable_node = alloc_stable_node();
1075 if (!stable_node)
1076 return NULL;
1077
1078 rb_link_node(&stable_node->node, parent, new);
1079 rb_insert_color(&stable_node->node, &root_stable_tree);
1080
1081 INIT_HLIST_HEAD(&stable_node->hlist);
1082
1083 stable_node->kpfn = page_to_pfn(kpage);
1084 set_page_stable_node(kpage, stable_node);
1085
1086 return stable_node;
1087}
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103static
1104struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
1105 struct page *page,
1106 struct page **tree_pagep)
1107
1108{
1109 struct rb_node **new = &root_unstable_tree.rb_node;
1110 struct rb_node *parent = NULL;
1111
1112 while (*new) {
1113 struct rmap_item *tree_rmap_item;
1114 struct page *tree_page;
1115 int ret;
1116
1117 cond_resched();
1118 tree_rmap_item = rb_entry(*new, struct rmap_item, node);
1119 tree_page = get_mergeable_page(tree_rmap_item);
1120 if (IS_ERR_OR_NULL(tree_page))
1121 return NULL;
1122
1123
1124
1125
1126 if (page == tree_page) {
1127 put_page(tree_page);
1128 return NULL;
1129 }
1130
1131 ret = memcmp_pages(page, tree_page);
1132
1133 parent = *new;
1134 if (ret < 0) {
1135 put_page(tree_page);
1136 new = &parent->rb_left;
1137 } else if (ret > 0) {
1138 put_page(tree_page);
1139 new = &parent->rb_right;
1140 } else {
1141 *tree_pagep = tree_page;
1142 return tree_rmap_item;
1143 }
1144 }
1145
1146 rmap_item->address |= UNSTABLE_FLAG;
1147 rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
1148 rb_link_node(&rmap_item->node, parent, new);
1149 rb_insert_color(&rmap_item->node, &root_unstable_tree);
1150
1151 ksm_pages_unshared++;
1152 return NULL;
1153}
1154
1155
1156
1157
1158
1159
1160static void stable_tree_append(struct rmap_item *rmap_item,
1161 struct stable_node *stable_node)
1162{
1163 rmap_item->head = stable_node;
1164 rmap_item->address |= STABLE_FLAG;
1165 hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
1166
1167 if (rmap_item->hlist.next)
1168 ksm_pages_sharing++;
1169 else
1170 ksm_pages_shared++;
1171}
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1183{
1184 struct rmap_item *tree_rmap_item;
1185 struct page *tree_page = NULL;
1186 struct stable_node *stable_node;
1187 struct page *kpage;
1188 unsigned int checksum;
1189 int err;
1190
1191 remove_rmap_item_from_tree(rmap_item);
1192
1193
1194 kpage = stable_tree_search(page);
1195 if (kpage) {
1196 err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
1197 if (!err) {
1198
1199
1200
1201
1202 lock_page(kpage);
1203 stable_tree_append(rmap_item, page_stable_node(kpage));
1204 unlock_page(kpage);
1205 }
1206 put_page(kpage);
1207 return;
1208 }
1209
1210
1211
1212
1213
1214
1215
1216 checksum = calc_checksum(page);
1217 if (rmap_item->oldchecksum != checksum) {
1218 rmap_item->oldchecksum = checksum;
1219 return;
1220 }
1221
1222 tree_rmap_item =
1223 unstable_tree_search_insert(rmap_item, page, &tree_page);
1224 if (tree_rmap_item) {
1225 kpage = try_to_merge_two_pages(rmap_item, page,
1226 tree_rmap_item, tree_page);
1227 put_page(tree_page);
1228
1229
1230
1231
1232
1233 if (kpage) {
1234 remove_rmap_item_from_tree(tree_rmap_item);
1235
1236 lock_page(kpage);
1237 stable_node = stable_tree_insert(kpage);
1238 if (stable_node) {
1239 stable_tree_append(tree_rmap_item, stable_node);
1240 stable_tree_append(rmap_item, stable_node);
1241 }
1242 unlock_page(kpage);
1243
1244
1245
1246
1247
1248
1249
1250 if (!stable_node) {
1251 break_cow(tree_rmap_item);
1252 break_cow(rmap_item);
1253 }
1254 }
1255 }
1256}
1257
1258static struct rmap_item *get_next_rmap_item(struct mm_slot *mm_slot,
1259 struct rmap_item **rmap_list,
1260 unsigned long addr)
1261{
1262 struct rmap_item *rmap_item;
1263
1264 while (*rmap_list) {
1265 rmap_item = *rmap_list;
1266 if ((rmap_item->address & PAGE_MASK) == addr)
1267 return rmap_item;
1268 if (rmap_item->address > addr)
1269 break;
1270 *rmap_list = rmap_item->rmap_list;
1271 remove_rmap_item_from_tree(rmap_item);
1272 free_rmap_item(rmap_item);
1273 }
1274
1275 rmap_item = alloc_rmap_item();
1276 if (rmap_item) {
1277
1278 rmap_item->mm = mm_slot->mm;
1279 rmap_item->address = addr;
1280 rmap_item->rmap_list = *rmap_list;
1281 *rmap_list = rmap_item;
1282 }
1283 return rmap_item;
1284}
1285
1286static struct rmap_item *scan_get_next_rmap_item(struct page **page)
1287{
1288 struct mm_struct *mm;
1289 struct mm_slot *slot;
1290 struct vm_area_struct *vma;
1291 struct rmap_item *rmap_item;
1292
1293 if (list_empty(&ksm_mm_head.mm_list))
1294 return NULL;
1295
1296 slot = ksm_scan.mm_slot;
1297 if (slot == &ksm_mm_head) {
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308 lru_add_drain_all();
1309
1310 root_unstable_tree = RB_ROOT;
1311
1312 spin_lock(&ksm_mmlist_lock);
1313 slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
1314 ksm_scan.mm_slot = slot;
1315 spin_unlock(&ksm_mmlist_lock);
1316
1317
1318
1319
1320 if (slot == &ksm_mm_head)
1321 return NULL;
1322next_mm:
1323 ksm_scan.address = 0;
1324 ksm_scan.rmap_list = &slot->rmap_list;
1325 }
1326
1327 mm = slot->mm;
1328 down_read(&mm->mmap_sem);
1329 if (ksm_test_exit(mm))
1330 vma = NULL;
1331 else
1332 vma = find_vma(mm, ksm_scan.address);
1333
1334 for (; vma; vma = vma->vm_next) {
1335 if (!(vma->vm_flags & VM_MERGEABLE))
1336 continue;
1337 if (ksm_scan.address < vma->vm_start)
1338 ksm_scan.address = vma->vm_start;
1339 if (!vma->anon_vma)
1340 ksm_scan.address = vma->vm_end;
1341
1342 while (ksm_scan.address < vma->vm_end) {
1343 if (ksm_test_exit(mm))
1344 break;
1345 *page = follow_page(vma, ksm_scan.address, FOLL_GET);
1346 if (IS_ERR_OR_NULL(*page)) {
1347 ksm_scan.address += PAGE_SIZE;
1348 cond_resched();
1349 continue;
1350 }
1351 if (PageAnon(*page) ||
1352 page_trans_compound_anon(*page)) {
1353 flush_anon_page(vma, *page, ksm_scan.address);
1354 flush_dcache_page(*page);
1355 rmap_item = get_next_rmap_item(slot,
1356 ksm_scan.rmap_list, ksm_scan.address);
1357 if (rmap_item) {
1358 ksm_scan.rmap_list =
1359 &rmap_item->rmap_list;
1360 ksm_scan.address += PAGE_SIZE;
1361 } else
1362 put_page(*page);
1363 up_read(&mm->mmap_sem);
1364 return rmap_item;
1365 }
1366 put_page(*page);
1367 ksm_scan.address += PAGE_SIZE;
1368 cond_resched();
1369 }
1370 }
1371
1372 if (ksm_test_exit(mm)) {
1373 ksm_scan.address = 0;
1374 ksm_scan.rmap_list = &slot->rmap_list;
1375 }
1376
1377
1378
1379
1380 remove_trailing_rmap_items(slot, ksm_scan.rmap_list);
1381
1382 spin_lock(&ksm_mmlist_lock);
1383 ksm_scan.mm_slot = list_entry(slot->mm_list.next,
1384 struct mm_slot, mm_list);
1385 if (ksm_scan.address == 0) {
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395 hlist_del(&slot->link);
1396 list_del(&slot->mm_list);
1397 spin_unlock(&ksm_mmlist_lock);
1398
1399 free_mm_slot(slot);
1400 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
1401 up_read(&mm->mmap_sem);
1402 mmdrop(mm);
1403 } else {
1404 spin_unlock(&ksm_mmlist_lock);
1405 up_read(&mm->mmap_sem);
1406 }
1407
1408
1409 slot = ksm_scan.mm_slot;
1410 if (slot != &ksm_mm_head)
1411 goto next_mm;
1412
1413 ksm_scan.seqnr++;
1414 return NULL;
1415}
1416
1417
1418
1419
1420
1421static void ksm_do_scan(unsigned int scan_npages)
1422{
1423 struct rmap_item *rmap_item;
1424 struct page *uninitialized_var(page);
1425
1426 while (scan_npages-- && likely(!freezing(current))) {
1427 cond_resched();
1428 rmap_item = scan_get_next_rmap_item(&page);
1429 if (!rmap_item)
1430 return;
1431 if (!PageKsm(page) || !in_stable_tree(rmap_item))
1432 cmp_and_merge_page(page, rmap_item);
1433 put_page(page);
1434 }
1435}
1436
1437static int ksmd_should_run(void)
1438{
1439 return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
1440}
1441
1442static int ksm_scan_thread(void *nothing)
1443{
1444 set_freezable();
1445 set_user_nice(current, 5);
1446
1447 while (!kthread_should_stop()) {
1448 mutex_lock(&ksm_thread_mutex);
1449 if (ksmd_should_run())
1450 ksm_do_scan(ksm_thread_pages_to_scan);
1451 mutex_unlock(&ksm_thread_mutex);
1452
1453 try_to_freeze();
1454
1455 if (ksmd_should_run()) {
1456 schedule_timeout_interruptible(
1457 msecs_to_jiffies(ksm_thread_sleep_millisecs));
1458 } else {
1459 wait_event_freezable(ksm_thread_wait,
1460 ksmd_should_run() || kthread_should_stop());
1461 }
1462 }
1463 return 0;
1464}
1465
1466int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
1467 unsigned long end, int advice, unsigned long *vm_flags)
1468{
1469 struct mm_struct *mm = vma->vm_mm;
1470 int err;
1471
1472 switch (advice) {
1473 case MADV_MERGEABLE:
1474
1475
1476
1477 if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
1478 VM_PFNMAP | VM_IO | VM_DONTEXPAND |
1479 VM_HUGETLB | VM_NONLINEAR | VM_MIXEDMAP))
1480 return 0;
1481
1482#ifdef VM_SAO
1483 if (*vm_flags & VM_SAO)
1484 return 0;
1485#endif
1486
1487 if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
1488 err = __ksm_enter(mm);
1489 if (err)
1490 return err;
1491 }
1492
1493 *vm_flags |= VM_MERGEABLE;
1494 break;
1495
1496 case MADV_UNMERGEABLE:
1497 if (!(*vm_flags & VM_MERGEABLE))
1498 return 0;
1499
1500 if (vma->anon_vma) {
1501 err = unmerge_ksm_pages(vma, start, end);
1502 if (err)
1503 return err;
1504 }
1505
1506 *vm_flags &= ~VM_MERGEABLE;
1507 break;
1508 }
1509
1510 return 0;
1511}
1512
1513int __ksm_enter(struct mm_struct *mm)
1514{
1515 struct mm_slot *mm_slot;
1516 int needs_wakeup;
1517
1518 mm_slot = alloc_mm_slot();
1519 if (!mm_slot)
1520 return -ENOMEM;
1521
1522
1523 needs_wakeup = list_empty(&ksm_mm_head.mm_list);
1524
1525 spin_lock(&ksm_mmlist_lock);
1526 insert_to_mm_slots_hash(mm, mm_slot);
1527
1528
1529
1530
1531
1532 list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
1533 spin_unlock(&ksm_mmlist_lock);
1534
1535 set_bit(MMF_VM_MERGEABLE, &mm->flags);
1536 atomic_inc(&mm->mm_count);
1537
1538 if (needs_wakeup)
1539 wake_up_interruptible(&ksm_thread_wait);
1540
1541 return 0;
1542}
1543
1544void __ksm_exit(struct mm_struct *mm)
1545{
1546 struct mm_slot *mm_slot;
1547 int easy_to_free = 0;
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558 spin_lock(&ksm_mmlist_lock);
1559 mm_slot = get_mm_slot(mm);
1560 if (mm_slot && ksm_scan.mm_slot != mm_slot) {
1561 if (!mm_slot->rmap_list) {
1562 hlist_del(&mm_slot->link);
1563 list_del(&mm_slot->mm_list);
1564 easy_to_free = 1;
1565 } else {
1566 list_move(&mm_slot->mm_list,
1567 &ksm_scan.mm_slot->mm_list);
1568 }
1569 }
1570 spin_unlock(&ksm_mmlist_lock);
1571
1572 if (easy_to_free) {
1573 free_mm_slot(mm_slot);
1574 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
1575 mmdrop(mm);
1576 } else if (mm_slot) {
1577 down_write(&mm->mmap_sem);
1578 up_write(&mm->mmap_sem);
1579 }
1580}
1581
1582struct page *ksm_does_need_to_copy(struct page *page,
1583 struct vm_area_struct *vma, unsigned long address)
1584{
1585 struct page *new_page;
1586
1587 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1588 if (new_page) {
1589 copy_user_highpage(new_page, page, address, vma);
1590
1591 SetPageDirty(new_page);
1592 __SetPageUptodate(new_page);
1593 SetPageSwapBacked(new_page);
1594 __set_page_locked(new_page);
1595
1596 if (!mlocked_vma_newpage(vma, new_page))
1597 lru_cache_add_lru(new_page, LRU_ACTIVE_ANON);
1598 else
1599 add_page_to_unevictable_list(new_page);
1600 }
1601
1602 return new_page;
1603}
1604
1605int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
1606 unsigned long *vm_flags)
1607{
1608 struct stable_node *stable_node;
1609 struct rmap_item *rmap_item;
1610 struct hlist_node *hlist;
1611 unsigned int mapcount = page_mapcount(page);
1612 int referenced = 0;
1613 int search_new_forks = 0;
1614
1615 VM_BUG_ON(!PageKsm(page));
1616 VM_BUG_ON(!PageLocked(page));
1617
1618 stable_node = page_stable_node(page);
1619 if (!stable_node)
1620 return 0;
1621again:
1622 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1623 struct anon_vma *anon_vma = rmap_item->anon_vma;
1624 struct anon_vma_chain *vmac;
1625 struct vm_area_struct *vma;
1626
1627 anon_vma_lock_read(anon_vma);
1628 anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
1629 0, ULONG_MAX) {
1630 vma = vmac->vma;
1631 if (rmap_item->address < vma->vm_start ||
1632 rmap_item->address >= vma->vm_end)
1633 continue;
1634
1635
1636
1637
1638
1639
1640 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1641 continue;
1642
1643 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
1644 continue;
1645
1646 referenced += page_referenced_one(page, vma,
1647 rmap_item->address, &mapcount, vm_flags);
1648 if (!search_new_forks || !mapcount)
1649 break;
1650 }
1651 anon_vma_unlock_read(anon_vma);
1652 if (!mapcount)
1653 goto out;
1654 }
1655 if (!search_new_forks++)
1656 goto again;
1657out:
1658 return referenced;
1659}
1660
1661int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
1662{
1663 struct stable_node *stable_node;
1664 struct hlist_node *hlist;
1665 struct rmap_item *rmap_item;
1666 int ret = SWAP_AGAIN;
1667 int search_new_forks = 0;
1668
1669 VM_BUG_ON(!PageKsm(page));
1670 VM_BUG_ON(!PageLocked(page));
1671
1672 stable_node = page_stable_node(page);
1673 if (!stable_node)
1674 return SWAP_FAIL;
1675again:
1676 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1677 struct anon_vma *anon_vma = rmap_item->anon_vma;
1678 struct anon_vma_chain *vmac;
1679 struct vm_area_struct *vma;
1680
1681 anon_vma_lock_read(anon_vma);
1682 anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
1683 0, ULONG_MAX) {
1684 vma = vmac->vma;
1685 if (rmap_item->address < vma->vm_start ||
1686 rmap_item->address >= vma->vm_end)
1687 continue;
1688
1689
1690
1691
1692
1693
1694 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1695 continue;
1696
1697 ret = try_to_unmap_one(page, vma,
1698 rmap_item->address, flags);
1699 if (ret != SWAP_AGAIN || !page_mapped(page)) {
1700 anon_vma_unlock_read(anon_vma);
1701 goto out;
1702 }
1703 }
1704 anon_vma_unlock_read(anon_vma);
1705 }
1706 if (!search_new_forks++)
1707 goto again;
1708out:
1709 return ret;
1710}
1711
1712#ifdef CONFIG_MIGRATION
1713int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
1714 struct vm_area_struct *, unsigned long, void *), void *arg)
1715{
1716 struct stable_node *stable_node;
1717 struct hlist_node *hlist;
1718 struct rmap_item *rmap_item;
1719 int ret = SWAP_AGAIN;
1720 int search_new_forks = 0;
1721
1722 VM_BUG_ON(!PageKsm(page));
1723 VM_BUG_ON(!PageLocked(page));
1724
1725 stable_node = page_stable_node(page);
1726 if (!stable_node)
1727 return ret;
1728again:
1729 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1730 struct anon_vma *anon_vma = rmap_item->anon_vma;
1731 struct anon_vma_chain *vmac;
1732 struct vm_area_struct *vma;
1733
1734 anon_vma_lock_read(anon_vma);
1735 anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
1736 0, ULONG_MAX) {
1737 vma = vmac->vma;
1738 if (rmap_item->address < vma->vm_start ||
1739 rmap_item->address >= vma->vm_end)
1740 continue;
1741
1742
1743
1744
1745
1746
1747 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1748 continue;
1749
1750 ret = rmap_one(page, vma, rmap_item->address, arg);
1751 if (ret != SWAP_AGAIN) {
1752 anon_vma_unlock_read(anon_vma);
1753 goto out;
1754 }
1755 }
1756 anon_vma_unlock_read(anon_vma);
1757 }
1758 if (!search_new_forks++)
1759 goto again;
1760out:
1761 return ret;
1762}
1763
1764void ksm_migrate_page(struct page *newpage, struct page *oldpage)
1765{
1766 struct stable_node *stable_node;
1767
1768 VM_BUG_ON(!PageLocked(oldpage));
1769 VM_BUG_ON(!PageLocked(newpage));
1770 VM_BUG_ON(newpage->mapping != oldpage->mapping);
1771
1772 stable_node = page_stable_node(newpage);
1773 if (stable_node) {
1774 VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
1775 stable_node->kpfn = page_to_pfn(newpage);
1776 }
1777}
1778#endif
1779
1780#ifdef CONFIG_MEMORY_HOTREMOVE
1781static struct stable_node *ksm_check_stable_tree(unsigned long start_pfn,
1782 unsigned long end_pfn)
1783{
1784 struct rb_node *node;
1785
1786 for (node = rb_first(&root_stable_tree); node; node = rb_next(node)) {
1787 struct stable_node *stable_node;
1788
1789 stable_node = rb_entry(node, struct stable_node, node);
1790 if (stable_node->kpfn >= start_pfn &&
1791 stable_node->kpfn < end_pfn)
1792 return stable_node;
1793 }
1794 return NULL;
1795}
1796
1797static int ksm_memory_callback(struct notifier_block *self,
1798 unsigned long action, void *arg)
1799{
1800 struct memory_notify *mn = arg;
1801 struct stable_node *stable_node;
1802
1803 switch (action) {
1804 case MEM_GOING_OFFLINE:
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814 mutex_lock_nested(&ksm_thread_mutex, SINGLE_DEPTH_NESTING);
1815 break;
1816
1817 case MEM_OFFLINE:
1818
1819
1820
1821
1822
1823 while ((stable_node = ksm_check_stable_tree(mn->start_pfn,
1824 mn->start_pfn + mn->nr_pages)) != NULL)
1825 remove_node_from_stable_tree(stable_node);
1826
1827
1828 case MEM_CANCEL_OFFLINE:
1829 mutex_unlock(&ksm_thread_mutex);
1830 break;
1831 }
1832 return NOTIFY_OK;
1833}
1834#endif
1835
1836#ifdef CONFIG_SYSFS
1837
1838
1839
1840
1841#define KSM_ATTR_RO(_name) \
1842 static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
1843#define KSM_ATTR(_name) \
1844 static struct kobj_attribute _name##_attr = \
1845 __ATTR(_name, 0644, _name##_show, _name##_store)
1846
1847static ssize_t sleep_millisecs_show(struct kobject *kobj,
1848 struct kobj_attribute *attr, char *buf)
1849{
1850 return sprintf(buf, "%u\n", ksm_thread_sleep_millisecs);
1851}
1852
1853static ssize_t sleep_millisecs_store(struct kobject *kobj,
1854 struct kobj_attribute *attr,
1855 const char *buf, size_t count)
1856{
1857 unsigned long msecs;
1858 int err;
1859
1860 err = strict_strtoul(buf, 10, &msecs);
1861 if (err || msecs > UINT_MAX)
1862 return -EINVAL;
1863
1864 ksm_thread_sleep_millisecs = msecs;
1865
1866 return count;
1867}
1868KSM_ATTR(sleep_millisecs);
1869
1870static ssize_t pages_to_scan_show(struct kobject *kobj,
1871 struct kobj_attribute *attr, char *buf)
1872{
1873 return sprintf(buf, "%u\n", ksm_thread_pages_to_scan);
1874}
1875
1876static ssize_t pages_to_scan_store(struct kobject *kobj,
1877 struct kobj_attribute *attr,
1878 const char *buf, size_t count)
1879{
1880 int err;
1881 unsigned long nr_pages;
1882
1883 err = strict_strtoul(buf, 10, &nr_pages);
1884 if (err || nr_pages > UINT_MAX)
1885 return -EINVAL;
1886
1887 ksm_thread_pages_to_scan = nr_pages;
1888
1889 return count;
1890}
1891KSM_ATTR(pages_to_scan);
1892
1893static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
1894 char *buf)
1895{
1896 return sprintf(buf, "%u\n", ksm_run);
1897}
1898
1899static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
1900 const char *buf, size_t count)
1901{
1902 int err;
1903 unsigned long flags;
1904
1905 err = strict_strtoul(buf, 10, &flags);
1906 if (err || flags > UINT_MAX)
1907 return -EINVAL;
1908 if (flags > KSM_RUN_UNMERGE)
1909 return -EINVAL;
1910
1911
1912
1913
1914
1915
1916
1917
1918 mutex_lock(&ksm_thread_mutex);
1919 if (ksm_run != flags) {
1920 ksm_run = flags;
1921 if (flags & KSM_RUN_UNMERGE) {
1922 set_current_oom_origin();
1923 err = unmerge_and_remove_all_rmap_items();
1924 clear_current_oom_origin();
1925 if (err) {
1926 ksm_run = KSM_RUN_STOP;
1927 count = err;
1928 }
1929 }
1930 }
1931 mutex_unlock(&ksm_thread_mutex);
1932
1933 if (flags & KSM_RUN_MERGE)
1934 wake_up_interruptible(&ksm_thread_wait);
1935
1936 return count;
1937}
1938KSM_ATTR(run);
1939
1940static ssize_t pages_shared_show(struct kobject *kobj,
1941 struct kobj_attribute *attr, char *buf)
1942{
1943 return sprintf(buf, "%lu\n", ksm_pages_shared);
1944}
1945KSM_ATTR_RO(pages_shared);
1946
1947static ssize_t pages_sharing_show(struct kobject *kobj,
1948 struct kobj_attribute *attr, char *buf)
1949{
1950 return sprintf(buf, "%lu\n", ksm_pages_sharing);
1951}
1952KSM_ATTR_RO(pages_sharing);
1953
1954static ssize_t pages_unshared_show(struct kobject *kobj,
1955 struct kobj_attribute *attr, char *buf)
1956{
1957 return sprintf(buf, "%lu\n", ksm_pages_unshared);
1958}
1959KSM_ATTR_RO(pages_unshared);
1960
1961static ssize_t pages_volatile_show(struct kobject *kobj,
1962 struct kobj_attribute *attr, char *buf)
1963{
1964 long ksm_pages_volatile;
1965
1966 ksm_pages_volatile = ksm_rmap_items - ksm_pages_shared
1967 - ksm_pages_sharing - ksm_pages_unshared;
1968
1969
1970
1971
1972 if (ksm_pages_volatile < 0)
1973 ksm_pages_volatile = 0;
1974 return sprintf(buf, "%ld\n", ksm_pages_volatile);
1975}
1976KSM_ATTR_RO(pages_volatile);
1977
1978static ssize_t full_scans_show(struct kobject *kobj,
1979 struct kobj_attribute *attr, char *buf)
1980{
1981 return sprintf(buf, "%lu\n", ksm_scan.seqnr);
1982}
1983KSM_ATTR_RO(full_scans);
1984
1985static struct attribute *ksm_attrs[] = {
1986 &sleep_millisecs_attr.attr,
1987 &pages_to_scan_attr.attr,
1988 &run_attr.attr,
1989 &pages_shared_attr.attr,
1990 &pages_sharing_attr.attr,
1991 &pages_unshared_attr.attr,
1992 &pages_volatile_attr.attr,
1993 &full_scans_attr.attr,
1994 NULL,
1995};
1996
1997static struct attribute_group ksm_attr_group = {
1998 .attrs = ksm_attrs,
1999 .name = "ksm",
2000};
2001#endif
2002
2003static int __init ksm_init(void)
2004{
2005 struct task_struct *ksm_thread;
2006 int err;
2007
2008 err = ksm_slab_init();
2009 if (err)
2010 goto out;
2011
2012 ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd");
2013 if (IS_ERR(ksm_thread)) {
2014 printk(KERN_ERR "ksm: creating kthread failed\n");
2015 err = PTR_ERR(ksm_thread);
2016 goto out_free;
2017 }
2018
2019#ifdef CONFIG_SYSFS
2020 err = sysfs_create_group(mm_kobj, &ksm_attr_group);
2021 if (err) {
2022 printk(KERN_ERR "ksm: register sysfs failed\n");
2023 kthread_stop(ksm_thread);
2024 goto out_free;
2025 }
2026#else
2027 ksm_run = KSM_RUN_MERGE;
2028
2029#endif
2030
2031#ifdef CONFIG_MEMORY_HOTREMOVE
2032
2033
2034
2035
2036 hotplug_memory_notifier(ksm_memory_callback, 100);
2037#endif
2038 return 0;
2039
2040out_free:
2041 ksm_slab_free();
2042out:
2043 return err;
2044}
2045module_init(ksm_init)
2046