1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/errno.h>
18#include <linux/mm.h>
19#include <linux/fs.h>
20#include <linux/mman.h>
21#include <linux/sched.h>
22#include <linux/rwsem.h>
23#include <linux/pagemap.h>
24#include <linux/rmap.h>
25#include <linux/spinlock.h>
26#include <linux/jhash.h>
27#include <linux/delay.h>
28#include <linux/kthread.h>
29#include <linux/wait.h>
30#include <linux/slab.h>
31#include <linux/rbtree.h>
32#include <linux/memory.h>
33#include <linux/mmu_notifier.h>
34#include <linux/swap.h>
35#include <linux/ksm.h>
36#include <linux/hash.h>
37#include <linux/freezer.h>
38
39#include <asm/tlbflush.h>
40#include "internal.h"
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89struct mm_slot {
90 struct hlist_node link;
91 struct list_head mm_list;
92 struct rmap_item *rmap_list;
93 struct mm_struct *mm;
94};
95
96
97
98
99
100
101
102
103
104
105struct ksm_scan {
106 struct mm_slot *mm_slot;
107 unsigned long address;
108 struct rmap_item **rmap_list;
109 unsigned long seqnr;
110};
111
112
113
114
115
116
117
118struct stable_node {
119 struct rb_node node;
120 struct hlist_head hlist;
121 unsigned long kpfn;
122};
123
124
125
126
127
128
129
130
131
132
133
134
135struct rmap_item {
136 struct rmap_item *rmap_list;
137 struct anon_vma *anon_vma;
138 struct mm_struct *mm;
139 unsigned long address;
140 unsigned int oldchecksum;
141 union {
142 struct rb_node node;
143 struct {
144 struct stable_node *head;
145 struct hlist_node hlist;
146 };
147 };
148};
149
150#define SEQNR_MASK 0x0ff
151#define UNSTABLE_FLAG 0x100
152#define STABLE_FLAG 0x200
153
154
155static struct rb_root root_stable_tree = RB_ROOT;
156static struct rb_root root_unstable_tree = RB_ROOT;
157
158#define MM_SLOTS_HASH_SHIFT 10
159#define MM_SLOTS_HASH_HEADS (1 << MM_SLOTS_HASH_SHIFT)
160static struct hlist_head mm_slots_hash[MM_SLOTS_HASH_HEADS];
161
162static struct mm_slot ksm_mm_head = {
163 .mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
164};
165static struct ksm_scan ksm_scan = {
166 .mm_slot = &ksm_mm_head,
167};
168
169static struct kmem_cache *rmap_item_cache;
170static struct kmem_cache *stable_node_cache;
171static struct kmem_cache *mm_slot_cache;
172
173
174static unsigned long ksm_pages_shared;
175
176
177static unsigned long ksm_pages_sharing;
178
179
180static unsigned long ksm_pages_unshared;
181
182
183static unsigned long ksm_rmap_items;
184
185
186static unsigned int ksm_thread_pages_to_scan = 100;
187
188
189static unsigned int ksm_thread_sleep_millisecs = 20;
190
191#define KSM_RUN_STOP 0
192#define KSM_RUN_MERGE 1
193#define KSM_RUN_UNMERGE 2
194static unsigned int ksm_run = KSM_RUN_STOP;
195
196static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
197static DEFINE_MUTEX(ksm_thread_mutex);
198static DEFINE_SPINLOCK(ksm_mmlist_lock);
199
200#define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
201 sizeof(struct __struct), __alignof__(struct __struct),\
202 (__flags), NULL)
203
204static int __init ksm_slab_init(void)
205{
206 rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
207 if (!rmap_item_cache)
208 goto out;
209
210 stable_node_cache = KSM_KMEM_CACHE(stable_node, 0);
211 if (!stable_node_cache)
212 goto out_free1;
213
214 mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
215 if (!mm_slot_cache)
216 goto out_free2;
217
218 return 0;
219
220out_free2:
221 kmem_cache_destroy(stable_node_cache);
222out_free1:
223 kmem_cache_destroy(rmap_item_cache);
224out:
225 return -ENOMEM;
226}
227
228static void __init ksm_slab_free(void)
229{
230 kmem_cache_destroy(mm_slot_cache);
231 kmem_cache_destroy(stable_node_cache);
232 kmem_cache_destroy(rmap_item_cache);
233 mm_slot_cache = NULL;
234}
235
236static inline struct rmap_item *alloc_rmap_item(void)
237{
238 struct rmap_item *rmap_item;
239
240 rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
241 if (rmap_item)
242 ksm_rmap_items++;
243 return rmap_item;
244}
245
246static inline void free_rmap_item(struct rmap_item *rmap_item)
247{
248 ksm_rmap_items--;
249 rmap_item->mm = NULL;
250 kmem_cache_free(rmap_item_cache, rmap_item);
251}
252
253static inline struct stable_node *alloc_stable_node(void)
254{
255 return kmem_cache_alloc(stable_node_cache, GFP_KERNEL);
256}
257
258static inline void free_stable_node(struct stable_node *stable_node)
259{
260 kmem_cache_free(stable_node_cache, stable_node);
261}
262
263static inline struct mm_slot *alloc_mm_slot(void)
264{
265 if (!mm_slot_cache)
266 return NULL;
267 return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
268}
269
270static inline void free_mm_slot(struct mm_slot *mm_slot)
271{
272 kmem_cache_free(mm_slot_cache, mm_slot);
273}
274
275static struct mm_slot *get_mm_slot(struct mm_struct *mm)
276{
277 struct mm_slot *mm_slot;
278 struct hlist_head *bucket;
279 struct hlist_node *node;
280
281 bucket = &mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
282 hlist_for_each_entry(mm_slot, node, bucket, link) {
283 if (mm == mm_slot->mm)
284 return mm_slot;
285 }
286 return NULL;
287}
288
289static void insert_to_mm_slots_hash(struct mm_struct *mm,
290 struct mm_slot *mm_slot)
291{
292 struct hlist_head *bucket;
293
294 bucket = &mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
295 mm_slot->mm = mm;
296 hlist_add_head(&mm_slot->link, bucket);
297}
298
299static inline int in_stable_tree(struct rmap_item *rmap_item)
300{
301 return rmap_item->address & STABLE_FLAG;
302}
303
304static void hold_anon_vma(struct rmap_item *rmap_item,
305 struct anon_vma *anon_vma)
306{
307 rmap_item->anon_vma = anon_vma;
308 get_anon_vma(anon_vma);
309}
310
311static void ksm_drop_anon_vma(struct rmap_item *rmap_item)
312{
313 struct anon_vma *anon_vma = rmap_item->anon_vma;
314
315 drop_anon_vma(anon_vma);
316}
317
318
319
320
321
322
323
324
325
326static inline bool ksm_test_exit(struct mm_struct *mm)
327{
328 return atomic_read(&mm->mm_users) == 0;
329}
330
331
332
333
334
335
336
337
338
339
340
341
342static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
343{
344 struct page *page;
345 int ret = 0;
346
347 do {
348 cond_resched();
349 page = follow_page(vma, addr, FOLL_GET);
350 if (IS_ERR_OR_NULL(page))
351 break;
352 if (PageKsm(page))
353 ret = handle_mm_fault(vma->vm_mm, vma, addr,
354 FAULT_FLAG_WRITE);
355 else
356 ret = VM_FAULT_WRITE;
357 put_page(page);
358 } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_OOM)));
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387 return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
388}
389
390static void break_cow(struct rmap_item *rmap_item)
391{
392 struct mm_struct *mm = rmap_item->mm;
393 unsigned long addr = rmap_item->address;
394 struct vm_area_struct *vma;
395
396
397
398
399
400 ksm_drop_anon_vma(rmap_item);
401
402 down_read(&mm->mmap_sem);
403 if (ksm_test_exit(mm))
404 goto out;
405 vma = find_vma(mm, addr);
406 if (!vma || vma->vm_start > addr)
407 goto out;
408 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
409 goto out;
410 break_ksm(vma, addr);
411out:
412 up_read(&mm->mmap_sem);
413}
414
415static struct page *page_trans_compound_anon(struct page *page)
416{
417 if (PageTransCompound(page)) {
418 struct page *head = compound_trans_head(page);
419
420
421
422
423 if (PageAnon(head))
424 return head;
425 }
426 return NULL;
427}
428
429static struct page *get_mergeable_page(struct rmap_item *rmap_item)
430{
431 struct mm_struct *mm = rmap_item->mm;
432 unsigned long addr = rmap_item->address;
433 struct vm_area_struct *vma;
434 struct page *page;
435
436 down_read(&mm->mmap_sem);
437 if (ksm_test_exit(mm))
438 goto out;
439 vma = find_vma(mm, addr);
440 if (!vma || vma->vm_start > addr)
441 goto out;
442 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
443 goto out;
444
445 page = follow_page(vma, addr, FOLL_GET);
446 if (IS_ERR_OR_NULL(page))
447 goto out;
448 if (PageAnon(page) || page_trans_compound_anon(page)) {
449 flush_anon_page(vma, page, addr);
450 flush_dcache_page(page);
451 } else {
452 put_page(page);
453out: page = NULL;
454 }
455 up_read(&mm->mmap_sem);
456 return page;
457}
458
459static void remove_node_from_stable_tree(struct stable_node *stable_node)
460{
461 struct rmap_item *rmap_item;
462 struct hlist_node *hlist;
463
464 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
465 if (rmap_item->hlist.next)
466 ksm_pages_sharing--;
467 else
468 ksm_pages_shared--;
469 ksm_drop_anon_vma(rmap_item);
470 rmap_item->address &= PAGE_MASK;
471 cond_resched();
472 }
473
474 rb_erase(&stable_node->node, &root_stable_tree);
475 free_stable_node(stable_node);
476}
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507static struct page *get_ksm_page(struct stable_node *stable_node)
508{
509 struct page *page;
510 void *expected_mapping;
511
512 page = pfn_to_page(stable_node->kpfn);
513 expected_mapping = (void *)stable_node +
514 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
515 rcu_read_lock();
516 if (page->mapping != expected_mapping)
517 goto stale;
518 if (!get_page_unless_zero(page))
519 goto stale;
520 if (page->mapping != expected_mapping) {
521 put_page(page);
522 goto stale;
523 }
524 rcu_read_unlock();
525 return page;
526stale:
527 rcu_read_unlock();
528 remove_node_from_stable_tree(stable_node);
529 return NULL;
530}
531
532
533
534
535
536static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
537{
538 if (rmap_item->address & STABLE_FLAG) {
539 struct stable_node *stable_node;
540 struct page *page;
541
542 stable_node = rmap_item->head;
543 page = get_ksm_page(stable_node);
544 if (!page)
545 goto out;
546
547 lock_page(page);
548 hlist_del(&rmap_item->hlist);
549 unlock_page(page);
550 put_page(page);
551
552 if (stable_node->hlist.first)
553 ksm_pages_sharing--;
554 else
555 ksm_pages_shared--;
556
557 ksm_drop_anon_vma(rmap_item);
558 rmap_item->address &= PAGE_MASK;
559
560 } else if (rmap_item->address & UNSTABLE_FLAG) {
561 unsigned char age;
562
563
564
565
566
567
568
569 age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
570 BUG_ON(age > 1);
571 if (!age)
572 rb_erase(&rmap_item->node, &root_unstable_tree);
573
574 ksm_pages_unshared--;
575 rmap_item->address &= PAGE_MASK;
576 }
577out:
578 cond_resched();
579}
580
581static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
582 struct rmap_item **rmap_list)
583{
584 while (*rmap_list) {
585 struct rmap_item *rmap_item = *rmap_list;
586 *rmap_list = rmap_item->rmap_list;
587 remove_rmap_item_from_tree(rmap_item);
588 free_rmap_item(rmap_item);
589 }
590}
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605static int unmerge_ksm_pages(struct vm_area_struct *vma,
606 unsigned long start, unsigned long end)
607{
608 unsigned long addr;
609 int err = 0;
610
611 for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
612 if (ksm_test_exit(vma->vm_mm))
613 break;
614 if (signal_pending(current))
615 err = -ERESTARTSYS;
616 else
617 err = break_ksm(vma, addr);
618 }
619 return err;
620}
621
622#ifdef CONFIG_SYSFS
623
624
625
626static int unmerge_and_remove_all_rmap_items(void)
627{
628 struct mm_slot *mm_slot;
629 struct mm_struct *mm;
630 struct vm_area_struct *vma;
631 int err = 0;
632
633 spin_lock(&ksm_mmlist_lock);
634 ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
635 struct mm_slot, mm_list);
636 spin_unlock(&ksm_mmlist_lock);
637
638 for (mm_slot = ksm_scan.mm_slot;
639 mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
640 mm = mm_slot->mm;
641 down_read(&mm->mmap_sem);
642 for (vma = mm->mmap; vma; vma = vma->vm_next) {
643 if (ksm_test_exit(mm))
644 break;
645 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
646 continue;
647 err = unmerge_ksm_pages(vma,
648 vma->vm_start, vma->vm_end);
649 if (err)
650 goto error;
651 }
652
653 remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
654
655 spin_lock(&ksm_mmlist_lock);
656 ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
657 struct mm_slot, mm_list);
658 if (ksm_test_exit(mm)) {
659 hlist_del(&mm_slot->link);
660 list_del(&mm_slot->mm_list);
661 spin_unlock(&ksm_mmlist_lock);
662
663 free_mm_slot(mm_slot);
664 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
665 up_read(&mm->mmap_sem);
666 mmdrop(mm);
667 } else {
668 spin_unlock(&ksm_mmlist_lock);
669 up_read(&mm->mmap_sem);
670 }
671 }
672
673 ksm_scan.seqnr = 0;
674 return 0;
675
676error:
677 up_read(&mm->mmap_sem);
678 spin_lock(&ksm_mmlist_lock);
679 ksm_scan.mm_slot = &ksm_mm_head;
680 spin_unlock(&ksm_mmlist_lock);
681 return err;
682}
683#endif
684
685static u32 calc_checksum(struct page *page)
686{
687 u32 checksum;
688 void *addr = kmap_atomic(page, KM_USER0);
689 checksum = jhash2(addr, PAGE_SIZE / 4, 17);
690 kunmap_atomic(addr, KM_USER0);
691 return checksum;
692}
693
694static int memcmp_pages(struct page *page1, struct page *page2)
695{
696 char *addr1, *addr2;
697 int ret;
698
699 addr1 = kmap_atomic(page1, KM_USER0);
700 addr2 = kmap_atomic(page2, KM_USER1);
701 ret = memcmp(addr1, addr2, PAGE_SIZE);
702 kunmap_atomic(addr2, KM_USER1);
703 kunmap_atomic(addr1, KM_USER0);
704 return ret;
705}
706
707static inline int pages_identical(struct page *page1, struct page *page2)
708{
709 return !memcmp_pages(page1, page2);
710}
711
712static int write_protect_page(struct vm_area_struct *vma, struct page *page,
713 pte_t *orig_pte)
714{
715 struct mm_struct *mm = vma->vm_mm;
716 unsigned long addr;
717 pte_t *ptep;
718 spinlock_t *ptl;
719 int swapped;
720 int err = -EFAULT;
721
722 addr = page_address_in_vma(page, vma);
723 if (addr == -EFAULT)
724 goto out;
725
726 BUG_ON(PageTransCompound(page));
727 ptep = page_check_address(page, mm, addr, &ptl, 0);
728 if (!ptep)
729 goto out;
730
731 if (pte_write(*ptep) || pte_dirty(*ptep)) {
732 pte_t entry;
733
734 swapped = PageSwapCache(page);
735 flush_cache_page(vma, addr, page_to_pfn(page));
736
737
738
739
740
741
742
743
744
745 entry = ptep_clear_flush(vma, addr, ptep);
746
747
748
749
750 if (page_mapcount(page) + 1 + swapped != page_count(page)) {
751 set_pte_at(mm, addr, ptep, entry);
752 goto out_unlock;
753 }
754 if (pte_dirty(entry))
755 set_page_dirty(page);
756 entry = pte_mkclean(pte_wrprotect(entry));
757 set_pte_at_notify(mm, addr, ptep, entry);
758 }
759 *orig_pte = *ptep;
760 err = 0;
761
762out_unlock:
763 pte_unmap_unlock(ptep, ptl);
764out:
765 return err;
766}
767
768
769
770
771
772
773
774
775
776
777static int replace_page(struct vm_area_struct *vma, struct page *page,
778 struct page *kpage, pte_t orig_pte)
779{
780 struct mm_struct *mm = vma->vm_mm;
781 pgd_t *pgd;
782 pud_t *pud;
783 pmd_t *pmd;
784 pte_t *ptep;
785 spinlock_t *ptl;
786 unsigned long addr;
787 int err = -EFAULT;
788
789 addr = page_address_in_vma(page, vma);
790 if (addr == -EFAULT)
791 goto out;
792
793 pgd = pgd_offset(mm, addr);
794 if (!pgd_present(*pgd))
795 goto out;
796
797 pud = pud_offset(pgd, addr);
798 if (!pud_present(*pud))
799 goto out;
800
801 pmd = pmd_offset(pud, addr);
802 BUG_ON(pmd_trans_huge(*pmd));
803 if (!pmd_present(*pmd))
804 goto out;
805
806 ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
807 if (!pte_same(*ptep, orig_pte)) {
808 pte_unmap_unlock(ptep, ptl);
809 goto out;
810 }
811
812 get_page(kpage);
813 page_add_anon_rmap(kpage, vma, addr);
814
815 flush_cache_page(vma, addr, pte_pfn(*ptep));
816 ptep_clear_flush(vma, addr, ptep);
817 set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
818
819 page_remove_rmap(page);
820 if (!page_mapped(page))
821 try_to_free_swap(page);
822 put_page(page);
823
824 pte_unmap_unlock(ptep, ptl);
825 err = 0;
826out:
827 return err;
828}
829
830static int page_trans_compound_anon_split(struct page *page)
831{
832 int ret = 0;
833 struct page *transhuge_head = page_trans_compound_anon(page);
834 if (transhuge_head) {
835
836 if (get_page_unless_zero(transhuge_head)) {
837
838
839
840
841 if (PageAnon(transhuge_head))
842 ret = split_huge_page(transhuge_head);
843 else
844
845
846
847
848 ret = 1;
849 put_page(transhuge_head);
850 } else
851
852 ret = 1;
853 }
854 return ret;
855}
856
857
858
859
860
861
862
863
864
865
866static int try_to_merge_one_page(struct vm_area_struct *vma,
867 struct page *page, struct page *kpage)
868{
869 pte_t orig_pte = __pte(0);
870 int err = -EFAULT;
871
872 if (page == kpage)
873 return 0;
874
875 if (!(vma->vm_flags & VM_MERGEABLE))
876 goto out;
877 if (PageTransCompound(page) && page_trans_compound_anon_split(page))
878 goto out;
879 BUG_ON(PageTransCompound(page));
880 if (!PageAnon(page))
881 goto out;
882
883
884
885
886
887
888
889
890 if (!trylock_page(page))
891 goto out;
892
893
894
895
896
897
898 if (write_protect_page(vma, page, &orig_pte) == 0) {
899 if (!kpage) {
900
901
902
903
904
905 set_page_stable_node(page, NULL);
906 mark_page_accessed(page);
907 err = 0;
908 } else if (pages_identical(page, kpage))
909 err = replace_page(vma, page, kpage, orig_pte);
910 }
911
912 if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
913 munlock_vma_page(page);
914 if (!PageMlocked(kpage)) {
915 unlock_page(page);
916 lock_page(kpage);
917 mlock_vma_page(kpage);
918 page = kpage;
919 }
920 }
921
922 unlock_page(page);
923out:
924 return err;
925}
926
927
928
929
930
931
932
933static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
934 struct page *page, struct page *kpage)
935{
936 struct mm_struct *mm = rmap_item->mm;
937 struct vm_area_struct *vma;
938 int err = -EFAULT;
939
940 down_read(&mm->mmap_sem);
941 if (ksm_test_exit(mm))
942 goto out;
943 vma = find_vma(mm, rmap_item->address);
944 if (!vma || vma->vm_start > rmap_item->address)
945 goto out;
946
947 err = try_to_merge_one_page(vma, page, kpage);
948 if (err)
949 goto out;
950
951
952 hold_anon_vma(rmap_item, vma->anon_vma);
953out:
954 up_read(&mm->mmap_sem);
955 return err;
956}
957
958
959
960
961
962
963
964
965
966
967
968static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
969 struct page *page,
970 struct rmap_item *tree_rmap_item,
971 struct page *tree_page)
972{
973 int err;
974
975 err = try_to_merge_with_ksm_page(rmap_item, page, NULL);
976 if (!err) {
977 err = try_to_merge_with_ksm_page(tree_rmap_item,
978 tree_page, page);
979
980
981
982
983 if (err)
984 break_cow(rmap_item);
985 }
986 return err ? NULL : page;
987}
988
989
990
991
992
993
994
995
996
997
998static struct page *stable_tree_search(struct page *page)
999{
1000 struct rb_node *node = root_stable_tree.rb_node;
1001 struct stable_node *stable_node;
1002
1003 stable_node = page_stable_node(page);
1004 if (stable_node) {
1005 get_page(page);
1006 return page;
1007 }
1008
1009 while (node) {
1010 struct page *tree_page;
1011 int ret;
1012
1013 cond_resched();
1014 stable_node = rb_entry(node, struct stable_node, node);
1015 tree_page = get_ksm_page(stable_node);
1016 if (!tree_page)
1017 return NULL;
1018
1019 ret = memcmp_pages(page, tree_page);
1020
1021 if (ret < 0) {
1022 put_page(tree_page);
1023 node = node->rb_left;
1024 } else if (ret > 0) {
1025 put_page(tree_page);
1026 node = node->rb_right;
1027 } else
1028 return tree_page;
1029 }
1030
1031 return NULL;
1032}
1033
1034
1035
1036
1037
1038
1039
1040
1041static struct stable_node *stable_tree_insert(struct page *kpage)
1042{
1043 struct rb_node **new = &root_stable_tree.rb_node;
1044 struct rb_node *parent = NULL;
1045 struct stable_node *stable_node;
1046
1047 while (*new) {
1048 struct page *tree_page;
1049 int ret;
1050
1051 cond_resched();
1052 stable_node = rb_entry(*new, struct stable_node, node);
1053 tree_page = get_ksm_page(stable_node);
1054 if (!tree_page)
1055 return NULL;
1056
1057 ret = memcmp_pages(kpage, tree_page);
1058 put_page(tree_page);
1059
1060 parent = *new;
1061 if (ret < 0)
1062 new = &parent->rb_left;
1063 else if (ret > 0)
1064 new = &parent->rb_right;
1065 else {
1066
1067
1068
1069
1070
1071 return NULL;
1072 }
1073 }
1074
1075 stable_node = alloc_stable_node();
1076 if (!stable_node)
1077 return NULL;
1078
1079 rb_link_node(&stable_node->node, parent, new);
1080 rb_insert_color(&stable_node->node, &root_stable_tree);
1081
1082 INIT_HLIST_HEAD(&stable_node->hlist);
1083
1084 stable_node->kpfn = page_to_pfn(kpage);
1085 set_page_stable_node(kpage, stable_node);
1086
1087 return stable_node;
1088}
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104static
1105struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
1106 struct page *page,
1107 struct page **tree_pagep)
1108
1109{
1110 struct rb_node **new = &root_unstable_tree.rb_node;
1111 struct rb_node *parent = NULL;
1112
1113 while (*new) {
1114 struct rmap_item *tree_rmap_item;
1115 struct page *tree_page;
1116 int ret;
1117
1118 cond_resched();
1119 tree_rmap_item = rb_entry(*new, struct rmap_item, node);
1120 tree_page = get_mergeable_page(tree_rmap_item);
1121 if (IS_ERR_OR_NULL(tree_page))
1122 return NULL;
1123
1124
1125
1126
1127 if (page == tree_page) {
1128 put_page(tree_page);
1129 return NULL;
1130 }
1131
1132 ret = memcmp_pages(page, tree_page);
1133
1134 parent = *new;
1135 if (ret < 0) {
1136 put_page(tree_page);
1137 new = &parent->rb_left;
1138 } else if (ret > 0) {
1139 put_page(tree_page);
1140 new = &parent->rb_right;
1141 } else {
1142 *tree_pagep = tree_page;
1143 return tree_rmap_item;
1144 }
1145 }
1146
1147 rmap_item->address |= UNSTABLE_FLAG;
1148 rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
1149 rb_link_node(&rmap_item->node, parent, new);
1150 rb_insert_color(&rmap_item->node, &root_unstable_tree);
1151
1152 ksm_pages_unshared++;
1153 return NULL;
1154}
1155
1156
1157
1158
1159
1160
1161static void stable_tree_append(struct rmap_item *rmap_item,
1162 struct stable_node *stable_node)
1163{
1164 rmap_item->head = stable_node;
1165 rmap_item->address |= STABLE_FLAG;
1166 hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
1167
1168 if (rmap_item->hlist.next)
1169 ksm_pages_sharing++;
1170 else
1171 ksm_pages_shared++;
1172}
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1184{
1185 struct rmap_item *tree_rmap_item;
1186 struct page *tree_page = NULL;
1187 struct stable_node *stable_node;
1188 struct page *kpage;
1189 unsigned int checksum;
1190 int err;
1191
1192 remove_rmap_item_from_tree(rmap_item);
1193
1194
1195 kpage = stable_tree_search(page);
1196 if (kpage) {
1197 err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
1198 if (!err) {
1199
1200
1201
1202
1203 lock_page(kpage);
1204 stable_tree_append(rmap_item, page_stable_node(kpage));
1205 unlock_page(kpage);
1206 }
1207 put_page(kpage);
1208 return;
1209 }
1210
1211
1212
1213
1214
1215
1216
1217 checksum = calc_checksum(page);
1218 if (rmap_item->oldchecksum != checksum) {
1219 rmap_item->oldchecksum = checksum;
1220 return;
1221 }
1222
1223 tree_rmap_item =
1224 unstable_tree_search_insert(rmap_item, page, &tree_page);
1225 if (tree_rmap_item) {
1226 kpage = try_to_merge_two_pages(rmap_item, page,
1227 tree_rmap_item, tree_page);
1228 put_page(tree_page);
1229
1230
1231
1232
1233
1234 if (kpage) {
1235 remove_rmap_item_from_tree(tree_rmap_item);
1236
1237 lock_page(kpage);
1238 stable_node = stable_tree_insert(kpage);
1239 if (stable_node) {
1240 stable_tree_append(tree_rmap_item, stable_node);
1241 stable_tree_append(rmap_item, stable_node);
1242 }
1243 unlock_page(kpage);
1244
1245
1246
1247
1248
1249
1250
1251 if (!stable_node) {
1252 break_cow(tree_rmap_item);
1253 break_cow(rmap_item);
1254 }
1255 }
1256 }
1257}
1258
1259static struct rmap_item *get_next_rmap_item(struct mm_slot *mm_slot,
1260 struct rmap_item **rmap_list,
1261 unsigned long addr)
1262{
1263 struct rmap_item *rmap_item;
1264
1265 while (*rmap_list) {
1266 rmap_item = *rmap_list;
1267 if ((rmap_item->address & PAGE_MASK) == addr)
1268 return rmap_item;
1269 if (rmap_item->address > addr)
1270 break;
1271 *rmap_list = rmap_item->rmap_list;
1272 remove_rmap_item_from_tree(rmap_item);
1273 free_rmap_item(rmap_item);
1274 }
1275
1276 rmap_item = alloc_rmap_item();
1277 if (rmap_item) {
1278
1279 rmap_item->mm = mm_slot->mm;
1280 rmap_item->address = addr;
1281 rmap_item->rmap_list = *rmap_list;
1282 *rmap_list = rmap_item;
1283 }
1284 return rmap_item;
1285}
1286
1287static struct rmap_item *scan_get_next_rmap_item(struct page **page)
1288{
1289 struct mm_struct *mm;
1290 struct mm_slot *slot;
1291 struct vm_area_struct *vma;
1292 struct rmap_item *rmap_item;
1293
1294 if (list_empty(&ksm_mm_head.mm_list))
1295 return NULL;
1296
1297 slot = ksm_scan.mm_slot;
1298 if (slot == &ksm_mm_head) {
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309 lru_add_drain_all();
1310
1311 root_unstable_tree = RB_ROOT;
1312
1313 spin_lock(&ksm_mmlist_lock);
1314 slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
1315 ksm_scan.mm_slot = slot;
1316 spin_unlock(&ksm_mmlist_lock);
1317next_mm:
1318 ksm_scan.address = 0;
1319 ksm_scan.rmap_list = &slot->rmap_list;
1320 }
1321
1322 mm = slot->mm;
1323 down_read(&mm->mmap_sem);
1324 if (ksm_test_exit(mm))
1325 vma = NULL;
1326 else
1327 vma = find_vma(mm, ksm_scan.address);
1328
1329 for (; vma; vma = vma->vm_next) {
1330 if (!(vma->vm_flags & VM_MERGEABLE))
1331 continue;
1332 if (ksm_scan.address < vma->vm_start)
1333 ksm_scan.address = vma->vm_start;
1334 if (!vma->anon_vma)
1335 ksm_scan.address = vma->vm_end;
1336
1337 while (ksm_scan.address < vma->vm_end) {
1338 if (ksm_test_exit(mm))
1339 break;
1340 *page = follow_page(vma, ksm_scan.address, FOLL_GET);
1341 if (IS_ERR_OR_NULL(*page)) {
1342 ksm_scan.address += PAGE_SIZE;
1343 cond_resched();
1344 continue;
1345 }
1346 if (PageAnon(*page) ||
1347 page_trans_compound_anon(*page)) {
1348 flush_anon_page(vma, *page, ksm_scan.address);
1349 flush_dcache_page(*page);
1350 rmap_item = get_next_rmap_item(slot,
1351 ksm_scan.rmap_list, ksm_scan.address);
1352 if (rmap_item) {
1353 ksm_scan.rmap_list =
1354 &rmap_item->rmap_list;
1355 ksm_scan.address += PAGE_SIZE;
1356 } else
1357 put_page(*page);
1358 up_read(&mm->mmap_sem);
1359 return rmap_item;
1360 }
1361 put_page(*page);
1362 ksm_scan.address += PAGE_SIZE;
1363 cond_resched();
1364 }
1365 }
1366
1367 if (ksm_test_exit(mm)) {
1368 ksm_scan.address = 0;
1369 ksm_scan.rmap_list = &slot->rmap_list;
1370 }
1371
1372
1373
1374
1375 remove_trailing_rmap_items(slot, ksm_scan.rmap_list);
1376
1377 spin_lock(&ksm_mmlist_lock);
1378 ksm_scan.mm_slot = list_entry(slot->mm_list.next,
1379 struct mm_slot, mm_list);
1380 if (ksm_scan.address == 0) {
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390 hlist_del(&slot->link);
1391 list_del(&slot->mm_list);
1392 spin_unlock(&ksm_mmlist_lock);
1393
1394 free_mm_slot(slot);
1395 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
1396 up_read(&mm->mmap_sem);
1397 mmdrop(mm);
1398 } else {
1399 spin_unlock(&ksm_mmlist_lock);
1400 up_read(&mm->mmap_sem);
1401 }
1402
1403
1404 slot = ksm_scan.mm_slot;
1405 if (slot != &ksm_mm_head)
1406 goto next_mm;
1407
1408 ksm_scan.seqnr++;
1409 return NULL;
1410}
1411
1412
1413
1414
1415
1416static void ksm_do_scan(unsigned int scan_npages)
1417{
1418 struct rmap_item *rmap_item;
1419 struct page *uninitialized_var(page);
1420
1421 while (scan_npages-- && likely(!freezing(current))) {
1422 cond_resched();
1423 rmap_item = scan_get_next_rmap_item(&page);
1424 if (!rmap_item)
1425 return;
1426 if (!PageKsm(page) || !in_stable_tree(rmap_item))
1427 cmp_and_merge_page(page, rmap_item);
1428 put_page(page);
1429 }
1430}
1431
1432static int ksmd_should_run(void)
1433{
1434 return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
1435}
1436
1437static int ksm_scan_thread(void *nothing)
1438{
1439 set_freezable();
1440 set_user_nice(current, 5);
1441
1442 while (!kthread_should_stop()) {
1443 mutex_lock(&ksm_thread_mutex);
1444 if (ksmd_should_run())
1445 ksm_do_scan(ksm_thread_pages_to_scan);
1446 mutex_unlock(&ksm_thread_mutex);
1447
1448 try_to_freeze();
1449
1450 if (ksmd_should_run()) {
1451 schedule_timeout_interruptible(
1452 msecs_to_jiffies(ksm_thread_sleep_millisecs));
1453 } else {
1454 wait_event_freezable(ksm_thread_wait,
1455 ksmd_should_run() || kthread_should_stop());
1456 }
1457 }
1458 return 0;
1459}
1460
1461int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
1462 unsigned long end, int advice, unsigned long *vm_flags)
1463{
1464 struct mm_struct *mm = vma->vm_mm;
1465 int err;
1466
1467 switch (advice) {
1468 case MADV_MERGEABLE:
1469
1470
1471
1472 if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
1473 VM_PFNMAP | VM_IO | VM_DONTEXPAND |
1474 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
1475 VM_NONLINEAR | VM_MIXEDMAP | VM_SAO))
1476 return 0;
1477
1478 if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
1479 err = __ksm_enter(mm);
1480 if (err)
1481 return err;
1482 }
1483
1484 *vm_flags |= VM_MERGEABLE;
1485 break;
1486
1487 case MADV_UNMERGEABLE:
1488 if (!(*vm_flags & VM_MERGEABLE))
1489 return 0;
1490
1491 if (vma->anon_vma) {
1492 err = unmerge_ksm_pages(vma, start, end);
1493 if (err)
1494 return err;
1495 }
1496
1497 *vm_flags &= ~VM_MERGEABLE;
1498 break;
1499 }
1500
1501 return 0;
1502}
1503
1504int __ksm_enter(struct mm_struct *mm)
1505{
1506 struct mm_slot *mm_slot;
1507 int needs_wakeup;
1508
1509 mm_slot = alloc_mm_slot();
1510 if (!mm_slot)
1511 return -ENOMEM;
1512
1513
1514 needs_wakeup = list_empty(&ksm_mm_head.mm_list);
1515
1516 spin_lock(&ksm_mmlist_lock);
1517 insert_to_mm_slots_hash(mm, mm_slot);
1518
1519
1520
1521
1522
1523 list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
1524 spin_unlock(&ksm_mmlist_lock);
1525
1526 set_bit(MMF_VM_MERGEABLE, &mm->flags);
1527 atomic_inc(&mm->mm_count);
1528
1529 if (needs_wakeup)
1530 wake_up_interruptible(&ksm_thread_wait);
1531
1532 return 0;
1533}
1534
1535void __ksm_exit(struct mm_struct *mm)
1536{
1537 struct mm_slot *mm_slot;
1538 int easy_to_free = 0;
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549 spin_lock(&ksm_mmlist_lock);
1550 mm_slot = get_mm_slot(mm);
1551 if (mm_slot && ksm_scan.mm_slot != mm_slot) {
1552 if (!mm_slot->rmap_list) {
1553 hlist_del(&mm_slot->link);
1554 list_del(&mm_slot->mm_list);
1555 easy_to_free = 1;
1556 } else {
1557 list_move(&mm_slot->mm_list,
1558 &ksm_scan.mm_slot->mm_list);
1559 }
1560 }
1561 spin_unlock(&ksm_mmlist_lock);
1562
1563 if (easy_to_free) {
1564 free_mm_slot(mm_slot);
1565 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
1566 mmdrop(mm);
1567 } else if (mm_slot) {
1568 down_write(&mm->mmap_sem);
1569 up_write(&mm->mmap_sem);
1570 }
1571}
1572
1573struct page *ksm_does_need_to_copy(struct page *page,
1574 struct vm_area_struct *vma, unsigned long address)
1575{
1576 struct page *new_page;
1577
1578 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1579 if (new_page) {
1580 copy_user_highpage(new_page, page, address, vma);
1581
1582 SetPageDirty(new_page);
1583 __SetPageUptodate(new_page);
1584 SetPageSwapBacked(new_page);
1585 __set_page_locked(new_page);
1586
1587 if (page_evictable(new_page, vma))
1588 lru_cache_add_lru(new_page, LRU_ACTIVE_ANON);
1589 else
1590 add_page_to_unevictable_list(new_page);
1591 }
1592
1593 return new_page;
1594}
1595
1596int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
1597 unsigned long *vm_flags)
1598{
1599 struct stable_node *stable_node;
1600 struct rmap_item *rmap_item;
1601 struct hlist_node *hlist;
1602 unsigned int mapcount = page_mapcount(page);
1603 int referenced = 0;
1604 int search_new_forks = 0;
1605
1606 VM_BUG_ON(!PageKsm(page));
1607 VM_BUG_ON(!PageLocked(page));
1608
1609 stable_node = page_stable_node(page);
1610 if (!stable_node)
1611 return 0;
1612again:
1613 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1614 struct anon_vma *anon_vma = rmap_item->anon_vma;
1615 struct anon_vma_chain *vmac;
1616 struct vm_area_struct *vma;
1617
1618 anon_vma_lock(anon_vma);
1619 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
1620 vma = vmac->vma;
1621 if (rmap_item->address < vma->vm_start ||
1622 rmap_item->address >= vma->vm_end)
1623 continue;
1624
1625
1626
1627
1628
1629
1630 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1631 continue;
1632
1633 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
1634 continue;
1635
1636 referenced += page_referenced_one(page, vma,
1637 rmap_item->address, &mapcount, vm_flags);
1638 if (!search_new_forks || !mapcount)
1639 break;
1640 }
1641 anon_vma_unlock(anon_vma);
1642 if (!mapcount)
1643 goto out;
1644 }
1645 if (!search_new_forks++)
1646 goto again;
1647out:
1648 return referenced;
1649}
1650
1651int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
1652{
1653 struct stable_node *stable_node;
1654 struct hlist_node *hlist;
1655 struct rmap_item *rmap_item;
1656 int ret = SWAP_AGAIN;
1657 int search_new_forks = 0;
1658
1659 VM_BUG_ON(!PageKsm(page));
1660 VM_BUG_ON(!PageLocked(page));
1661
1662 stable_node = page_stable_node(page);
1663 if (!stable_node)
1664 return SWAP_FAIL;
1665again:
1666 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1667 struct anon_vma *anon_vma = rmap_item->anon_vma;
1668 struct anon_vma_chain *vmac;
1669 struct vm_area_struct *vma;
1670
1671 anon_vma_lock(anon_vma);
1672 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
1673 vma = vmac->vma;
1674 if (rmap_item->address < vma->vm_start ||
1675 rmap_item->address >= vma->vm_end)
1676 continue;
1677
1678
1679
1680
1681
1682
1683 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1684 continue;
1685
1686 ret = try_to_unmap_one(page, vma,
1687 rmap_item->address, flags);
1688 if (ret != SWAP_AGAIN || !page_mapped(page)) {
1689 anon_vma_unlock(anon_vma);
1690 goto out;
1691 }
1692 }
1693 anon_vma_unlock(anon_vma);
1694 }
1695 if (!search_new_forks++)
1696 goto again;
1697out:
1698 return ret;
1699}
1700
1701#ifdef CONFIG_MIGRATION
1702int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
1703 struct vm_area_struct *, unsigned long, void *), void *arg)
1704{
1705 struct stable_node *stable_node;
1706 struct hlist_node *hlist;
1707 struct rmap_item *rmap_item;
1708 int ret = SWAP_AGAIN;
1709 int search_new_forks = 0;
1710
1711 VM_BUG_ON(!PageKsm(page));
1712 VM_BUG_ON(!PageLocked(page));
1713
1714 stable_node = page_stable_node(page);
1715 if (!stable_node)
1716 return ret;
1717again:
1718 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1719 struct anon_vma *anon_vma = rmap_item->anon_vma;
1720 struct anon_vma_chain *vmac;
1721 struct vm_area_struct *vma;
1722
1723 anon_vma_lock(anon_vma);
1724 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
1725 vma = vmac->vma;
1726 if (rmap_item->address < vma->vm_start ||
1727 rmap_item->address >= vma->vm_end)
1728 continue;
1729
1730
1731
1732
1733
1734
1735 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1736 continue;
1737
1738 ret = rmap_one(page, vma, rmap_item->address, arg);
1739 if (ret != SWAP_AGAIN) {
1740 anon_vma_unlock(anon_vma);
1741 goto out;
1742 }
1743 }
1744 anon_vma_unlock(anon_vma);
1745 }
1746 if (!search_new_forks++)
1747 goto again;
1748out:
1749 return ret;
1750}
1751
1752void ksm_migrate_page(struct page *newpage, struct page *oldpage)
1753{
1754 struct stable_node *stable_node;
1755
1756 VM_BUG_ON(!PageLocked(oldpage));
1757 VM_BUG_ON(!PageLocked(newpage));
1758 VM_BUG_ON(newpage->mapping != oldpage->mapping);
1759
1760 stable_node = page_stable_node(newpage);
1761 if (stable_node) {
1762 VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
1763 stable_node->kpfn = page_to_pfn(newpage);
1764 }
1765}
1766#endif
1767
1768#ifdef CONFIG_MEMORY_HOTREMOVE
1769static struct stable_node *ksm_check_stable_tree(unsigned long start_pfn,
1770 unsigned long end_pfn)
1771{
1772 struct rb_node *node;
1773
1774 for (node = rb_first(&root_stable_tree); node; node = rb_next(node)) {
1775 struct stable_node *stable_node;
1776
1777 stable_node = rb_entry(node, struct stable_node, node);
1778 if (stable_node->kpfn >= start_pfn &&
1779 stable_node->kpfn < end_pfn)
1780 return stable_node;
1781 }
1782 return NULL;
1783}
1784
1785static int ksm_memory_callback(struct notifier_block *self,
1786 unsigned long action, void *arg)
1787{
1788 struct memory_notify *mn = arg;
1789 struct stable_node *stable_node;
1790
1791 switch (action) {
1792 case MEM_GOING_OFFLINE:
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802 mutex_lock_nested(&ksm_thread_mutex, SINGLE_DEPTH_NESTING);
1803 break;
1804
1805 case MEM_OFFLINE:
1806
1807
1808
1809
1810
1811 while ((stable_node = ksm_check_stable_tree(mn->start_pfn,
1812 mn->start_pfn + mn->nr_pages)) != NULL)
1813 remove_node_from_stable_tree(stable_node);
1814
1815
1816 case MEM_CANCEL_OFFLINE:
1817 mutex_unlock(&ksm_thread_mutex);
1818 break;
1819 }
1820 return NOTIFY_OK;
1821}
1822#endif
1823
1824#ifdef CONFIG_SYSFS
1825
1826
1827
1828
1829#define KSM_ATTR_RO(_name) \
1830 static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
1831#define KSM_ATTR(_name) \
1832 static struct kobj_attribute _name##_attr = \
1833 __ATTR(_name, 0644, _name##_show, _name##_store)
1834
1835static ssize_t sleep_millisecs_show(struct kobject *kobj,
1836 struct kobj_attribute *attr, char *buf)
1837{
1838 return sprintf(buf, "%u\n", ksm_thread_sleep_millisecs);
1839}
1840
1841static ssize_t sleep_millisecs_store(struct kobject *kobj,
1842 struct kobj_attribute *attr,
1843 const char *buf, size_t count)
1844{
1845 unsigned long msecs;
1846 int err;
1847
1848 err = strict_strtoul(buf, 10, &msecs);
1849 if (err || msecs > UINT_MAX)
1850 return -EINVAL;
1851
1852 ksm_thread_sleep_millisecs = msecs;
1853
1854 return count;
1855}
1856KSM_ATTR(sleep_millisecs);
1857
1858static ssize_t pages_to_scan_show(struct kobject *kobj,
1859 struct kobj_attribute *attr, char *buf)
1860{
1861 return sprintf(buf, "%u\n", ksm_thread_pages_to_scan);
1862}
1863
1864static ssize_t pages_to_scan_store(struct kobject *kobj,
1865 struct kobj_attribute *attr,
1866 const char *buf, size_t count)
1867{
1868 int err;
1869 unsigned long nr_pages;
1870
1871 err = strict_strtoul(buf, 10, &nr_pages);
1872 if (err || nr_pages > UINT_MAX)
1873 return -EINVAL;
1874
1875 ksm_thread_pages_to_scan = nr_pages;
1876
1877 return count;
1878}
1879KSM_ATTR(pages_to_scan);
1880
1881static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
1882 char *buf)
1883{
1884 return sprintf(buf, "%u\n", ksm_run);
1885}
1886
1887static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
1888 const char *buf, size_t count)
1889{
1890 int err;
1891 unsigned long flags;
1892
1893 err = strict_strtoul(buf, 10, &flags);
1894 if (err || flags > UINT_MAX)
1895 return -EINVAL;
1896 if (flags > KSM_RUN_UNMERGE)
1897 return -EINVAL;
1898
1899
1900
1901
1902
1903
1904
1905
1906 mutex_lock(&ksm_thread_mutex);
1907 if (ksm_run != flags) {
1908 ksm_run = flags;
1909 if (flags & KSM_RUN_UNMERGE) {
1910 current->flags |= PF_OOM_ORIGIN;
1911 err = unmerge_and_remove_all_rmap_items();
1912 current->flags &= ~PF_OOM_ORIGIN;
1913 if (err) {
1914 ksm_run = KSM_RUN_STOP;
1915 count = err;
1916 }
1917 }
1918 }
1919 mutex_unlock(&ksm_thread_mutex);
1920
1921 if (flags & KSM_RUN_MERGE)
1922 wake_up_interruptible(&ksm_thread_wait);
1923
1924 return count;
1925}
1926KSM_ATTR(run);
1927
1928static ssize_t pages_shared_show(struct kobject *kobj,
1929 struct kobj_attribute *attr, char *buf)
1930{
1931 return sprintf(buf, "%lu\n", ksm_pages_shared);
1932}
1933KSM_ATTR_RO(pages_shared);
1934
1935static ssize_t pages_sharing_show(struct kobject *kobj,
1936 struct kobj_attribute *attr, char *buf)
1937{
1938 return sprintf(buf, "%lu\n", ksm_pages_sharing);
1939}
1940KSM_ATTR_RO(pages_sharing);
1941
1942static ssize_t pages_unshared_show(struct kobject *kobj,
1943 struct kobj_attribute *attr, char *buf)
1944{
1945 return sprintf(buf, "%lu\n", ksm_pages_unshared);
1946}
1947KSM_ATTR_RO(pages_unshared);
1948
1949static ssize_t pages_volatile_show(struct kobject *kobj,
1950 struct kobj_attribute *attr, char *buf)
1951{
1952 long ksm_pages_volatile;
1953
1954 ksm_pages_volatile = ksm_rmap_items - ksm_pages_shared
1955 - ksm_pages_sharing - ksm_pages_unshared;
1956
1957
1958
1959
1960 if (ksm_pages_volatile < 0)
1961 ksm_pages_volatile = 0;
1962 return sprintf(buf, "%ld\n", ksm_pages_volatile);
1963}
1964KSM_ATTR_RO(pages_volatile);
1965
1966static ssize_t full_scans_show(struct kobject *kobj,
1967 struct kobj_attribute *attr, char *buf)
1968{
1969 return sprintf(buf, "%lu\n", ksm_scan.seqnr);
1970}
1971KSM_ATTR_RO(full_scans);
1972
1973static struct attribute *ksm_attrs[] = {
1974 &sleep_millisecs_attr.attr,
1975 &pages_to_scan_attr.attr,
1976 &run_attr.attr,
1977 &pages_shared_attr.attr,
1978 &pages_sharing_attr.attr,
1979 &pages_unshared_attr.attr,
1980 &pages_volatile_attr.attr,
1981 &full_scans_attr.attr,
1982 NULL,
1983};
1984
1985static struct attribute_group ksm_attr_group = {
1986 .attrs = ksm_attrs,
1987 .name = "ksm",
1988};
1989#endif
1990
1991static int __init ksm_init(void)
1992{
1993 struct task_struct *ksm_thread;
1994 int err;
1995
1996 err = ksm_slab_init();
1997 if (err)
1998 goto out;
1999
2000 ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd");
2001 if (IS_ERR(ksm_thread)) {
2002 printk(KERN_ERR "ksm: creating kthread failed\n");
2003 err = PTR_ERR(ksm_thread);
2004 goto out_free;
2005 }
2006
2007#ifdef CONFIG_SYSFS
2008 err = sysfs_create_group(mm_kobj, &ksm_attr_group);
2009 if (err) {
2010 printk(KERN_ERR "ksm: register sysfs failed\n");
2011 kthread_stop(ksm_thread);
2012 goto out_free;
2013 }
2014#else
2015 ksm_run = KSM_RUN_MERGE;
2016
2017#endif
2018
2019#ifdef CONFIG_MEMORY_HOTREMOVE
2020
2021
2022
2023
2024 hotplug_memory_notifier(ksm_memory_callback, 100);
2025#endif
2026 return 0;
2027
2028out_free:
2029 ksm_slab_free();
2030out:
2031 return err;
2032}
2033module_init(ksm_init)
2034