1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/errno.h>
18#include <linux/mm.h>
19#include <linux/fs.h>
20#include <linux/mman.h>
21#include <linux/sched.h>
22#include <linux/rwsem.h>
23#include <linux/pagemap.h>
24#include <linux/rmap.h>
25#include <linux/spinlock.h>
26#include <linux/jhash.h>
27#include <linux/delay.h>
28#include <linux/kthread.h>
29#include <linux/wait.h>
30#include <linux/slab.h>
31#include <linux/rbtree.h>
32#include <linux/memory.h>
33#include <linux/mmu_notifier.h>
34#include <linux/swap.h>
35#include <linux/ksm.h>
36#include <linux/hash.h>
37#include <linux/freezer.h>
38
39#include <asm/tlbflush.h>
40#include "internal.h"
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89struct mm_slot {
90 struct hlist_node link;
91 struct list_head mm_list;
92 struct rmap_item *rmap_list;
93 struct mm_struct *mm;
94};
95
96
97
98
99
100
101
102
103
104
105struct ksm_scan {
106 struct mm_slot *mm_slot;
107 unsigned long address;
108 struct rmap_item **rmap_list;
109 unsigned long seqnr;
110};
111
112
113
114
115
116
117
118struct stable_node {
119 struct rb_node node;
120 struct hlist_head hlist;
121 unsigned long kpfn;
122};
123
124
125
126
127
128
129
130
131
132
133
134
135struct rmap_item {
136 struct rmap_item *rmap_list;
137 struct anon_vma *anon_vma;
138 struct mm_struct *mm;
139 unsigned long address;
140 unsigned int oldchecksum;
141 union {
142 struct rb_node node;
143 struct {
144 struct stable_node *head;
145 struct hlist_node hlist;
146 };
147 };
148};
149
150#define SEQNR_MASK 0x0ff
151#define UNSTABLE_FLAG 0x100
152#define STABLE_FLAG 0x200
153
154
155static struct rb_root root_stable_tree = RB_ROOT;
156static struct rb_root root_unstable_tree = RB_ROOT;
157
158#define MM_SLOTS_HASH_SHIFT 10
159#define MM_SLOTS_HASH_HEADS (1 << MM_SLOTS_HASH_SHIFT)
160static struct hlist_head mm_slots_hash[MM_SLOTS_HASH_HEADS];
161
162static struct mm_slot ksm_mm_head = {
163 .mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
164};
165static struct ksm_scan ksm_scan = {
166 .mm_slot = &ksm_mm_head,
167};
168
169static struct kmem_cache *rmap_item_cache;
170static struct kmem_cache *stable_node_cache;
171static struct kmem_cache *mm_slot_cache;
172
173
174static unsigned long ksm_pages_shared;
175
176
177static unsigned long ksm_pages_sharing;
178
179
180static unsigned long ksm_pages_unshared;
181
182
183static unsigned long ksm_rmap_items;
184
185
186static unsigned int ksm_thread_pages_to_scan = 100;
187
188
189static unsigned int ksm_thread_sleep_millisecs = 20;
190
191#define KSM_RUN_STOP 0
192#define KSM_RUN_MERGE 1
193#define KSM_RUN_UNMERGE 2
194static unsigned int ksm_run = KSM_RUN_STOP;
195
196static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
197static DEFINE_MUTEX(ksm_thread_mutex);
198static DEFINE_SPINLOCK(ksm_mmlist_lock);
199
200#define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
201 sizeof(struct __struct), __alignof__(struct __struct),\
202 (__flags), NULL)
203
204static int __init ksm_slab_init(void)
205{
206 rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
207 if (!rmap_item_cache)
208 goto out;
209
210 stable_node_cache = KSM_KMEM_CACHE(stable_node, 0);
211 if (!stable_node_cache)
212 goto out_free1;
213
214 mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
215 if (!mm_slot_cache)
216 goto out_free2;
217
218 return 0;
219
220out_free2:
221 kmem_cache_destroy(stable_node_cache);
222out_free1:
223 kmem_cache_destroy(rmap_item_cache);
224out:
225 return -ENOMEM;
226}
227
228static void __init ksm_slab_free(void)
229{
230 kmem_cache_destroy(mm_slot_cache);
231 kmem_cache_destroy(stable_node_cache);
232 kmem_cache_destroy(rmap_item_cache);
233 mm_slot_cache = NULL;
234}
235
236static inline struct rmap_item *alloc_rmap_item(void)
237{
238 struct rmap_item *rmap_item;
239
240 rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
241 if (rmap_item)
242 ksm_rmap_items++;
243 return rmap_item;
244}
245
246static inline void free_rmap_item(struct rmap_item *rmap_item)
247{
248 ksm_rmap_items--;
249 rmap_item->mm = NULL;
250 kmem_cache_free(rmap_item_cache, rmap_item);
251}
252
253static inline struct stable_node *alloc_stable_node(void)
254{
255 return kmem_cache_alloc(stable_node_cache, GFP_KERNEL);
256}
257
258static inline void free_stable_node(struct stable_node *stable_node)
259{
260 kmem_cache_free(stable_node_cache, stable_node);
261}
262
263static inline struct mm_slot *alloc_mm_slot(void)
264{
265 if (!mm_slot_cache)
266 return NULL;
267 return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
268}
269
270static inline void free_mm_slot(struct mm_slot *mm_slot)
271{
272 kmem_cache_free(mm_slot_cache, mm_slot);
273}
274
275static struct mm_slot *get_mm_slot(struct mm_struct *mm)
276{
277 struct mm_slot *mm_slot;
278 struct hlist_head *bucket;
279 struct hlist_node *node;
280
281 bucket = &mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
282 hlist_for_each_entry(mm_slot, node, bucket, link) {
283 if (mm == mm_slot->mm)
284 return mm_slot;
285 }
286 return NULL;
287}
288
289static void insert_to_mm_slots_hash(struct mm_struct *mm,
290 struct mm_slot *mm_slot)
291{
292 struct hlist_head *bucket;
293
294 bucket = &mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
295 mm_slot->mm = mm;
296 hlist_add_head(&mm_slot->link, bucket);
297}
298
299static inline int in_stable_tree(struct rmap_item *rmap_item)
300{
301 return rmap_item->address & STABLE_FLAG;
302}
303
304
305
306
307
308
309
310
311
312static inline bool ksm_test_exit(struct mm_struct *mm)
313{
314 return atomic_read(&mm->mm_users) == 0;
315}
316
317
318
319
320
321
322
323
324
325
326
327
328static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
329{
330 struct page *page;
331 int ret = 0;
332
333 do {
334 cond_resched();
335 page = follow_page(vma, addr, FOLL_GET);
336 if (IS_ERR_OR_NULL(page))
337 break;
338 if (PageKsm(page))
339 ret = handle_mm_fault(vma->vm_mm, vma, addr,
340 FAULT_FLAG_WRITE);
341 else
342 ret = VM_FAULT_WRITE;
343 put_page(page);
344 } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_OOM)));
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373 return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
374}
375
376static void break_cow(struct rmap_item *rmap_item)
377{
378 struct mm_struct *mm = rmap_item->mm;
379 unsigned long addr = rmap_item->address;
380 struct vm_area_struct *vma;
381
382
383
384
385
386 put_anon_vma(rmap_item->anon_vma);
387
388 down_read(&mm->mmap_sem);
389 if (ksm_test_exit(mm))
390 goto out;
391 vma = find_vma(mm, addr);
392 if (!vma || vma->vm_start > addr)
393 goto out;
394 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
395 goto out;
396 break_ksm(vma, addr);
397out:
398 up_read(&mm->mmap_sem);
399}
400
401static struct page *page_trans_compound_anon(struct page *page)
402{
403 if (PageTransCompound(page)) {
404 struct page *head = compound_trans_head(page);
405
406
407
408
409 if (PageAnon(head))
410 return head;
411 }
412 return NULL;
413}
414
415static struct page *get_mergeable_page(struct rmap_item *rmap_item)
416{
417 struct mm_struct *mm = rmap_item->mm;
418 unsigned long addr = rmap_item->address;
419 struct vm_area_struct *vma;
420 struct page *page;
421
422 down_read(&mm->mmap_sem);
423 if (ksm_test_exit(mm))
424 goto out;
425 vma = find_vma(mm, addr);
426 if (!vma || vma->vm_start > addr)
427 goto out;
428 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
429 goto out;
430
431 page = follow_page(vma, addr, FOLL_GET);
432 if (IS_ERR_OR_NULL(page))
433 goto out;
434 if (PageAnon(page) || page_trans_compound_anon(page)) {
435 flush_anon_page(vma, page, addr);
436 flush_dcache_page(page);
437 } else {
438 put_page(page);
439out: page = NULL;
440 }
441 up_read(&mm->mmap_sem);
442 return page;
443}
444
445static void remove_node_from_stable_tree(struct stable_node *stable_node)
446{
447 struct rmap_item *rmap_item;
448 struct hlist_node *hlist;
449
450 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
451 if (rmap_item->hlist.next)
452 ksm_pages_sharing--;
453 else
454 ksm_pages_shared--;
455 put_anon_vma(rmap_item->anon_vma);
456 rmap_item->address &= PAGE_MASK;
457 cond_resched();
458 }
459
460 rb_erase(&stable_node->node, &root_stable_tree);
461 free_stable_node(stable_node);
462}
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493static struct page *get_ksm_page(struct stable_node *stable_node)
494{
495 struct page *page;
496 void *expected_mapping;
497
498 page = pfn_to_page(stable_node->kpfn);
499 expected_mapping = (void *)stable_node +
500 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
501 rcu_read_lock();
502 if (page->mapping != expected_mapping)
503 goto stale;
504 if (!get_page_unless_zero(page))
505 goto stale;
506 if (page->mapping != expected_mapping) {
507 put_page(page);
508 goto stale;
509 }
510 rcu_read_unlock();
511 return page;
512stale:
513 rcu_read_unlock();
514 remove_node_from_stable_tree(stable_node);
515 return NULL;
516}
517
518
519
520
521
522static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
523{
524 if (rmap_item->address & STABLE_FLAG) {
525 struct stable_node *stable_node;
526 struct page *page;
527
528 stable_node = rmap_item->head;
529 page = get_ksm_page(stable_node);
530 if (!page)
531 goto out;
532
533 lock_page(page);
534 hlist_del(&rmap_item->hlist);
535 unlock_page(page);
536 put_page(page);
537
538 if (stable_node->hlist.first)
539 ksm_pages_sharing--;
540 else
541 ksm_pages_shared--;
542
543 put_anon_vma(rmap_item->anon_vma);
544 rmap_item->address &= PAGE_MASK;
545
546 } else if (rmap_item->address & UNSTABLE_FLAG) {
547 unsigned char age;
548
549
550
551
552
553
554
555 age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
556 BUG_ON(age > 1);
557 if (!age)
558 rb_erase(&rmap_item->node, &root_unstable_tree);
559
560 ksm_pages_unshared--;
561 rmap_item->address &= PAGE_MASK;
562 }
563out:
564 cond_resched();
565}
566
567static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
568 struct rmap_item **rmap_list)
569{
570 while (*rmap_list) {
571 struct rmap_item *rmap_item = *rmap_list;
572 *rmap_list = rmap_item->rmap_list;
573 remove_rmap_item_from_tree(rmap_item);
574 free_rmap_item(rmap_item);
575 }
576}
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591static int unmerge_ksm_pages(struct vm_area_struct *vma,
592 unsigned long start, unsigned long end)
593{
594 unsigned long addr;
595 int err = 0;
596
597 for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
598 if (ksm_test_exit(vma->vm_mm))
599 break;
600 if (signal_pending(current))
601 err = -ERESTARTSYS;
602 else
603 err = break_ksm(vma, addr);
604 }
605 return err;
606}
607
608#ifdef CONFIG_SYSFS
609
610
611
612static int unmerge_and_remove_all_rmap_items(void)
613{
614 struct mm_slot *mm_slot;
615 struct mm_struct *mm;
616 struct vm_area_struct *vma;
617 int err = 0;
618
619 spin_lock(&ksm_mmlist_lock);
620 ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
621 struct mm_slot, mm_list);
622 spin_unlock(&ksm_mmlist_lock);
623
624 for (mm_slot = ksm_scan.mm_slot;
625 mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
626 mm = mm_slot->mm;
627 down_read(&mm->mmap_sem);
628 for (vma = mm->mmap; vma; vma = vma->vm_next) {
629 if (ksm_test_exit(mm))
630 break;
631 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
632 continue;
633 err = unmerge_ksm_pages(vma,
634 vma->vm_start, vma->vm_end);
635 if (err)
636 goto error;
637 }
638
639 remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
640
641 spin_lock(&ksm_mmlist_lock);
642 ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
643 struct mm_slot, mm_list);
644 if (ksm_test_exit(mm)) {
645 hlist_del(&mm_slot->link);
646 list_del(&mm_slot->mm_list);
647 spin_unlock(&ksm_mmlist_lock);
648
649 free_mm_slot(mm_slot);
650 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
651 up_read(&mm->mmap_sem);
652 mmdrop(mm);
653 } else {
654 spin_unlock(&ksm_mmlist_lock);
655 up_read(&mm->mmap_sem);
656 }
657 }
658
659 ksm_scan.seqnr = 0;
660 return 0;
661
662error:
663 up_read(&mm->mmap_sem);
664 spin_lock(&ksm_mmlist_lock);
665 ksm_scan.mm_slot = &ksm_mm_head;
666 spin_unlock(&ksm_mmlist_lock);
667 return err;
668}
669#endif
670
671static u32 calc_checksum(struct page *page)
672{
673 u32 checksum;
674 void *addr = kmap_atomic(page, KM_USER0);
675 checksum = jhash2(addr, PAGE_SIZE / 4, 17);
676 kunmap_atomic(addr, KM_USER0);
677 return checksum;
678}
679
680static int memcmp_pages(struct page *page1, struct page *page2)
681{
682 char *addr1, *addr2;
683 int ret;
684
685 addr1 = kmap_atomic(page1, KM_USER0);
686 addr2 = kmap_atomic(page2, KM_USER1);
687 ret = memcmp(addr1, addr2, PAGE_SIZE);
688 kunmap_atomic(addr2, KM_USER1);
689 kunmap_atomic(addr1, KM_USER0);
690 return ret;
691}
692
693static inline int pages_identical(struct page *page1, struct page *page2)
694{
695 return !memcmp_pages(page1, page2);
696}
697
698static int write_protect_page(struct vm_area_struct *vma, struct page *page,
699 pte_t *orig_pte)
700{
701 struct mm_struct *mm = vma->vm_mm;
702 unsigned long addr;
703 pte_t *ptep;
704 spinlock_t *ptl;
705 int swapped;
706 int err = -EFAULT;
707
708 addr = page_address_in_vma(page, vma);
709 if (addr == -EFAULT)
710 goto out;
711
712 BUG_ON(PageTransCompound(page));
713 ptep = page_check_address(page, mm, addr, &ptl, 0);
714 if (!ptep)
715 goto out;
716
717 if (pte_write(*ptep) || pte_dirty(*ptep)) {
718 pte_t entry;
719
720 swapped = PageSwapCache(page);
721 flush_cache_page(vma, addr, page_to_pfn(page));
722
723
724
725
726
727
728
729
730
731 entry = ptep_clear_flush(vma, addr, ptep);
732
733
734
735
736 if (page_mapcount(page) + 1 + swapped != page_count(page)) {
737 set_pte_at(mm, addr, ptep, entry);
738 goto out_unlock;
739 }
740 if (pte_dirty(entry))
741 set_page_dirty(page);
742 entry = pte_mkclean(pte_wrprotect(entry));
743 set_pte_at_notify(mm, addr, ptep, entry);
744 }
745 *orig_pte = *ptep;
746 err = 0;
747
748out_unlock:
749 pte_unmap_unlock(ptep, ptl);
750out:
751 return err;
752}
753
754
755
756
757
758
759
760
761
762
763static int replace_page(struct vm_area_struct *vma, struct page *page,
764 struct page *kpage, pte_t orig_pte)
765{
766 struct mm_struct *mm = vma->vm_mm;
767 pgd_t *pgd;
768 pud_t *pud;
769 pmd_t *pmd;
770 pte_t *ptep;
771 spinlock_t *ptl;
772 unsigned long addr;
773 int err = -EFAULT;
774
775 addr = page_address_in_vma(page, vma);
776 if (addr == -EFAULT)
777 goto out;
778
779 pgd = pgd_offset(mm, addr);
780 if (!pgd_present(*pgd))
781 goto out;
782
783 pud = pud_offset(pgd, addr);
784 if (!pud_present(*pud))
785 goto out;
786
787 pmd = pmd_offset(pud, addr);
788 BUG_ON(pmd_trans_huge(*pmd));
789 if (!pmd_present(*pmd))
790 goto out;
791
792 ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
793 if (!pte_same(*ptep, orig_pte)) {
794 pte_unmap_unlock(ptep, ptl);
795 goto out;
796 }
797
798 get_page(kpage);
799 page_add_anon_rmap(kpage, vma, addr);
800
801 flush_cache_page(vma, addr, pte_pfn(*ptep));
802 ptep_clear_flush(vma, addr, ptep);
803 set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
804
805 page_remove_rmap(page);
806 if (!page_mapped(page))
807 try_to_free_swap(page);
808 put_page(page);
809
810 pte_unmap_unlock(ptep, ptl);
811 err = 0;
812out:
813 return err;
814}
815
816static int page_trans_compound_anon_split(struct page *page)
817{
818 int ret = 0;
819 struct page *transhuge_head = page_trans_compound_anon(page);
820 if (transhuge_head) {
821
822 if (get_page_unless_zero(transhuge_head)) {
823
824
825
826
827 if (PageAnon(transhuge_head))
828 ret = split_huge_page(transhuge_head);
829 else
830
831
832
833
834 ret = 1;
835 put_page(transhuge_head);
836 } else
837
838 ret = 1;
839 }
840 return ret;
841}
842
843
844
845
846
847
848
849
850
851
852static int try_to_merge_one_page(struct vm_area_struct *vma,
853 struct page *page, struct page *kpage)
854{
855 pte_t orig_pte = __pte(0);
856 int err = -EFAULT;
857
858 if (page == kpage)
859 return 0;
860
861 if (!(vma->vm_flags & VM_MERGEABLE))
862 goto out;
863 if (PageTransCompound(page) && page_trans_compound_anon_split(page))
864 goto out;
865 BUG_ON(PageTransCompound(page));
866 if (!PageAnon(page))
867 goto out;
868
869
870
871
872
873
874
875
876 if (!trylock_page(page))
877 goto out;
878
879
880
881
882
883
884 if (write_protect_page(vma, page, &orig_pte) == 0) {
885 if (!kpage) {
886
887
888
889
890
891 set_page_stable_node(page, NULL);
892 mark_page_accessed(page);
893 err = 0;
894 } else if (pages_identical(page, kpage))
895 err = replace_page(vma, page, kpage, orig_pte);
896 }
897
898 if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
899 munlock_vma_page(page);
900 if (!PageMlocked(kpage)) {
901 unlock_page(page);
902 lock_page(kpage);
903 mlock_vma_page(kpage);
904 page = kpage;
905 }
906 }
907
908 unlock_page(page);
909out:
910 return err;
911}
912
913
914
915
916
917
918
919static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
920 struct page *page, struct page *kpage)
921{
922 struct mm_struct *mm = rmap_item->mm;
923 struct vm_area_struct *vma;
924 int err = -EFAULT;
925
926 down_read(&mm->mmap_sem);
927 if (ksm_test_exit(mm))
928 goto out;
929 vma = find_vma(mm, rmap_item->address);
930 if (!vma || vma->vm_start > rmap_item->address)
931 goto out;
932
933 err = try_to_merge_one_page(vma, page, kpage);
934 if (err)
935 goto out;
936
937
938 rmap_item->anon_vma = vma->anon_vma;
939 get_anon_vma(vma->anon_vma);
940out:
941 up_read(&mm->mmap_sem);
942 return err;
943}
944
945
946
947
948
949
950
951
952
953
954
955static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
956 struct page *page,
957 struct rmap_item *tree_rmap_item,
958 struct page *tree_page)
959{
960 int err;
961
962 err = try_to_merge_with_ksm_page(rmap_item, page, NULL);
963 if (!err) {
964 err = try_to_merge_with_ksm_page(tree_rmap_item,
965 tree_page, page);
966
967
968
969
970 if (err)
971 break_cow(rmap_item);
972 }
973 return err ? NULL : page;
974}
975
976
977
978
979
980
981
982
983
984
985static struct page *stable_tree_search(struct page *page)
986{
987 struct rb_node *node = root_stable_tree.rb_node;
988 struct stable_node *stable_node;
989
990 stable_node = page_stable_node(page);
991 if (stable_node) {
992 get_page(page);
993 return page;
994 }
995
996 while (node) {
997 struct page *tree_page;
998 int ret;
999
1000 cond_resched();
1001 stable_node = rb_entry(node, struct stable_node, node);
1002 tree_page = get_ksm_page(stable_node);
1003 if (!tree_page)
1004 return NULL;
1005
1006 ret = memcmp_pages(page, tree_page);
1007
1008 if (ret < 0) {
1009 put_page(tree_page);
1010 node = node->rb_left;
1011 } else if (ret > 0) {
1012 put_page(tree_page);
1013 node = node->rb_right;
1014 } else
1015 return tree_page;
1016 }
1017
1018 return NULL;
1019}
1020
1021
1022
1023
1024
1025
1026
1027
1028static struct stable_node *stable_tree_insert(struct page *kpage)
1029{
1030 struct rb_node **new = &root_stable_tree.rb_node;
1031 struct rb_node *parent = NULL;
1032 struct stable_node *stable_node;
1033
1034 while (*new) {
1035 struct page *tree_page;
1036 int ret;
1037
1038 cond_resched();
1039 stable_node = rb_entry(*new, struct stable_node, node);
1040 tree_page = get_ksm_page(stable_node);
1041 if (!tree_page)
1042 return NULL;
1043
1044 ret = memcmp_pages(kpage, tree_page);
1045 put_page(tree_page);
1046
1047 parent = *new;
1048 if (ret < 0)
1049 new = &parent->rb_left;
1050 else if (ret > 0)
1051 new = &parent->rb_right;
1052 else {
1053
1054
1055
1056
1057
1058 return NULL;
1059 }
1060 }
1061
1062 stable_node = alloc_stable_node();
1063 if (!stable_node)
1064 return NULL;
1065
1066 rb_link_node(&stable_node->node, parent, new);
1067 rb_insert_color(&stable_node->node, &root_stable_tree);
1068
1069 INIT_HLIST_HEAD(&stable_node->hlist);
1070
1071 stable_node->kpfn = page_to_pfn(kpage);
1072 set_page_stable_node(kpage, stable_node);
1073
1074 return stable_node;
1075}
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091static
1092struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
1093 struct page *page,
1094 struct page **tree_pagep)
1095
1096{
1097 struct rb_node **new = &root_unstable_tree.rb_node;
1098 struct rb_node *parent = NULL;
1099
1100 while (*new) {
1101 struct rmap_item *tree_rmap_item;
1102 struct page *tree_page;
1103 int ret;
1104
1105 cond_resched();
1106 tree_rmap_item = rb_entry(*new, struct rmap_item, node);
1107 tree_page = get_mergeable_page(tree_rmap_item);
1108 if (IS_ERR_OR_NULL(tree_page))
1109 return NULL;
1110
1111
1112
1113
1114 if (page == tree_page) {
1115 put_page(tree_page);
1116 return NULL;
1117 }
1118
1119 ret = memcmp_pages(page, tree_page);
1120
1121 parent = *new;
1122 if (ret < 0) {
1123 put_page(tree_page);
1124 new = &parent->rb_left;
1125 } else if (ret > 0) {
1126 put_page(tree_page);
1127 new = &parent->rb_right;
1128 } else {
1129 *tree_pagep = tree_page;
1130 return tree_rmap_item;
1131 }
1132 }
1133
1134 rmap_item->address |= UNSTABLE_FLAG;
1135 rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
1136 rb_link_node(&rmap_item->node, parent, new);
1137 rb_insert_color(&rmap_item->node, &root_unstable_tree);
1138
1139 ksm_pages_unshared++;
1140 return NULL;
1141}
1142
1143
1144
1145
1146
1147
1148static void stable_tree_append(struct rmap_item *rmap_item,
1149 struct stable_node *stable_node)
1150{
1151 rmap_item->head = stable_node;
1152 rmap_item->address |= STABLE_FLAG;
1153 hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
1154
1155 if (rmap_item->hlist.next)
1156 ksm_pages_sharing++;
1157 else
1158 ksm_pages_shared++;
1159}
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1171{
1172 struct rmap_item *tree_rmap_item;
1173 struct page *tree_page = NULL;
1174 struct stable_node *stable_node;
1175 struct page *kpage;
1176 unsigned int checksum;
1177 int err;
1178
1179 remove_rmap_item_from_tree(rmap_item);
1180
1181
1182 kpage = stable_tree_search(page);
1183 if (kpage) {
1184 err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
1185 if (!err) {
1186
1187
1188
1189
1190 lock_page(kpage);
1191 stable_tree_append(rmap_item, page_stable_node(kpage));
1192 unlock_page(kpage);
1193 }
1194 put_page(kpage);
1195 return;
1196 }
1197
1198
1199
1200
1201
1202
1203
1204 checksum = calc_checksum(page);
1205 if (rmap_item->oldchecksum != checksum) {
1206 rmap_item->oldchecksum = checksum;
1207 return;
1208 }
1209
1210 tree_rmap_item =
1211 unstable_tree_search_insert(rmap_item, page, &tree_page);
1212 if (tree_rmap_item) {
1213 kpage = try_to_merge_two_pages(rmap_item, page,
1214 tree_rmap_item, tree_page);
1215 put_page(tree_page);
1216
1217
1218
1219
1220
1221 if (kpage) {
1222 remove_rmap_item_from_tree(tree_rmap_item);
1223
1224 lock_page(kpage);
1225 stable_node = stable_tree_insert(kpage);
1226 if (stable_node) {
1227 stable_tree_append(tree_rmap_item, stable_node);
1228 stable_tree_append(rmap_item, stable_node);
1229 }
1230 unlock_page(kpage);
1231
1232
1233
1234
1235
1236
1237
1238 if (!stable_node) {
1239 break_cow(tree_rmap_item);
1240 break_cow(rmap_item);
1241 }
1242 }
1243 }
1244}
1245
1246static struct rmap_item *get_next_rmap_item(struct mm_slot *mm_slot,
1247 struct rmap_item **rmap_list,
1248 unsigned long addr)
1249{
1250 struct rmap_item *rmap_item;
1251
1252 while (*rmap_list) {
1253 rmap_item = *rmap_list;
1254 if ((rmap_item->address & PAGE_MASK) == addr)
1255 return rmap_item;
1256 if (rmap_item->address > addr)
1257 break;
1258 *rmap_list = rmap_item->rmap_list;
1259 remove_rmap_item_from_tree(rmap_item);
1260 free_rmap_item(rmap_item);
1261 }
1262
1263 rmap_item = alloc_rmap_item();
1264 if (rmap_item) {
1265
1266 rmap_item->mm = mm_slot->mm;
1267 rmap_item->address = addr;
1268 rmap_item->rmap_list = *rmap_list;
1269 *rmap_list = rmap_item;
1270 }
1271 return rmap_item;
1272}
1273
1274static struct rmap_item *scan_get_next_rmap_item(struct page **page)
1275{
1276 struct mm_struct *mm;
1277 struct mm_slot *slot;
1278 struct vm_area_struct *vma;
1279 struct rmap_item *rmap_item;
1280
1281 if (list_empty(&ksm_mm_head.mm_list))
1282 return NULL;
1283
1284 slot = ksm_scan.mm_slot;
1285 if (slot == &ksm_mm_head) {
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296 lru_add_drain_all();
1297
1298 root_unstable_tree = RB_ROOT;
1299
1300 spin_lock(&ksm_mmlist_lock);
1301 slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
1302 ksm_scan.mm_slot = slot;
1303 spin_unlock(&ksm_mmlist_lock);
1304next_mm:
1305 ksm_scan.address = 0;
1306 ksm_scan.rmap_list = &slot->rmap_list;
1307 }
1308
1309 mm = slot->mm;
1310 down_read(&mm->mmap_sem);
1311 if (ksm_test_exit(mm))
1312 vma = NULL;
1313 else
1314 vma = find_vma(mm, ksm_scan.address);
1315
1316 for (; vma; vma = vma->vm_next) {
1317 if (!(vma->vm_flags & VM_MERGEABLE))
1318 continue;
1319 if (ksm_scan.address < vma->vm_start)
1320 ksm_scan.address = vma->vm_start;
1321 if (!vma->anon_vma)
1322 ksm_scan.address = vma->vm_end;
1323
1324 while (ksm_scan.address < vma->vm_end) {
1325 if (ksm_test_exit(mm))
1326 break;
1327 *page = follow_page(vma, ksm_scan.address, FOLL_GET);
1328 if (IS_ERR_OR_NULL(*page)) {
1329 ksm_scan.address += PAGE_SIZE;
1330 cond_resched();
1331 continue;
1332 }
1333 if (PageAnon(*page) ||
1334 page_trans_compound_anon(*page)) {
1335 flush_anon_page(vma, *page, ksm_scan.address);
1336 flush_dcache_page(*page);
1337 rmap_item = get_next_rmap_item(slot,
1338 ksm_scan.rmap_list, ksm_scan.address);
1339 if (rmap_item) {
1340 ksm_scan.rmap_list =
1341 &rmap_item->rmap_list;
1342 ksm_scan.address += PAGE_SIZE;
1343 } else
1344 put_page(*page);
1345 up_read(&mm->mmap_sem);
1346 return rmap_item;
1347 }
1348 put_page(*page);
1349 ksm_scan.address += PAGE_SIZE;
1350 cond_resched();
1351 }
1352 }
1353
1354 if (ksm_test_exit(mm)) {
1355 ksm_scan.address = 0;
1356 ksm_scan.rmap_list = &slot->rmap_list;
1357 }
1358
1359
1360
1361
1362 remove_trailing_rmap_items(slot, ksm_scan.rmap_list);
1363
1364 spin_lock(&ksm_mmlist_lock);
1365 ksm_scan.mm_slot = list_entry(slot->mm_list.next,
1366 struct mm_slot, mm_list);
1367 if (ksm_scan.address == 0) {
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377 hlist_del(&slot->link);
1378 list_del(&slot->mm_list);
1379 spin_unlock(&ksm_mmlist_lock);
1380
1381 free_mm_slot(slot);
1382 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
1383 up_read(&mm->mmap_sem);
1384 mmdrop(mm);
1385 } else {
1386 spin_unlock(&ksm_mmlist_lock);
1387 up_read(&mm->mmap_sem);
1388 }
1389
1390
1391 slot = ksm_scan.mm_slot;
1392 if (slot != &ksm_mm_head)
1393 goto next_mm;
1394
1395 ksm_scan.seqnr++;
1396 return NULL;
1397}
1398
1399
1400
1401
1402
1403static void ksm_do_scan(unsigned int scan_npages)
1404{
1405 struct rmap_item *rmap_item;
1406 struct page *uninitialized_var(page);
1407
1408 while (scan_npages-- && likely(!freezing(current))) {
1409 cond_resched();
1410 rmap_item = scan_get_next_rmap_item(&page);
1411 if (!rmap_item)
1412 return;
1413 if (!PageKsm(page) || !in_stable_tree(rmap_item))
1414 cmp_and_merge_page(page, rmap_item);
1415 put_page(page);
1416 }
1417}
1418
1419static int ksmd_should_run(void)
1420{
1421 return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
1422}
1423
1424static int ksm_scan_thread(void *nothing)
1425{
1426 set_freezable();
1427 set_user_nice(current, 5);
1428
1429 while (!kthread_should_stop()) {
1430 mutex_lock(&ksm_thread_mutex);
1431 if (ksmd_should_run())
1432 ksm_do_scan(ksm_thread_pages_to_scan);
1433 mutex_unlock(&ksm_thread_mutex);
1434
1435 try_to_freeze();
1436
1437 if (ksmd_should_run()) {
1438 schedule_timeout_interruptible(
1439 msecs_to_jiffies(ksm_thread_sleep_millisecs));
1440 } else {
1441 wait_event_freezable(ksm_thread_wait,
1442 ksmd_should_run() || kthread_should_stop());
1443 }
1444 }
1445 return 0;
1446}
1447
1448int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
1449 unsigned long end, int advice, unsigned long *vm_flags)
1450{
1451 struct mm_struct *mm = vma->vm_mm;
1452 int err;
1453
1454 switch (advice) {
1455 case MADV_MERGEABLE:
1456
1457
1458
1459 if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
1460 VM_PFNMAP | VM_IO | VM_DONTEXPAND |
1461 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
1462 VM_NONLINEAR | VM_MIXEDMAP | VM_SAO))
1463 return 0;
1464
1465 if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
1466 err = __ksm_enter(mm);
1467 if (err)
1468 return err;
1469 }
1470
1471 *vm_flags |= VM_MERGEABLE;
1472 break;
1473
1474 case MADV_UNMERGEABLE:
1475 if (!(*vm_flags & VM_MERGEABLE))
1476 return 0;
1477
1478 if (vma->anon_vma) {
1479 err = unmerge_ksm_pages(vma, start, end);
1480 if (err)
1481 return err;
1482 }
1483
1484 *vm_flags &= ~VM_MERGEABLE;
1485 break;
1486 }
1487
1488 return 0;
1489}
1490
1491int __ksm_enter(struct mm_struct *mm)
1492{
1493 struct mm_slot *mm_slot;
1494 int needs_wakeup;
1495
1496 mm_slot = alloc_mm_slot();
1497 if (!mm_slot)
1498 return -ENOMEM;
1499
1500
1501 needs_wakeup = list_empty(&ksm_mm_head.mm_list);
1502
1503 spin_lock(&ksm_mmlist_lock);
1504 insert_to_mm_slots_hash(mm, mm_slot);
1505
1506
1507
1508
1509
1510 list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
1511 spin_unlock(&ksm_mmlist_lock);
1512
1513 set_bit(MMF_VM_MERGEABLE, &mm->flags);
1514 atomic_inc(&mm->mm_count);
1515
1516 if (needs_wakeup)
1517 wake_up_interruptible(&ksm_thread_wait);
1518
1519 return 0;
1520}
1521
1522void __ksm_exit(struct mm_struct *mm)
1523{
1524 struct mm_slot *mm_slot;
1525 int easy_to_free = 0;
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536 spin_lock(&ksm_mmlist_lock);
1537 mm_slot = get_mm_slot(mm);
1538 if (mm_slot && ksm_scan.mm_slot != mm_slot) {
1539 if (!mm_slot->rmap_list) {
1540 hlist_del(&mm_slot->link);
1541 list_del(&mm_slot->mm_list);
1542 easy_to_free = 1;
1543 } else {
1544 list_move(&mm_slot->mm_list,
1545 &ksm_scan.mm_slot->mm_list);
1546 }
1547 }
1548 spin_unlock(&ksm_mmlist_lock);
1549
1550 if (easy_to_free) {
1551 free_mm_slot(mm_slot);
1552 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
1553 mmdrop(mm);
1554 } else if (mm_slot) {
1555 down_write(&mm->mmap_sem);
1556 up_write(&mm->mmap_sem);
1557 }
1558}
1559
1560struct page *ksm_does_need_to_copy(struct page *page,
1561 struct vm_area_struct *vma, unsigned long address)
1562{
1563 struct page *new_page;
1564
1565 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1566 if (new_page) {
1567 copy_user_highpage(new_page, page, address, vma);
1568
1569 SetPageDirty(new_page);
1570 __SetPageUptodate(new_page);
1571 SetPageSwapBacked(new_page);
1572 __set_page_locked(new_page);
1573
1574 if (page_evictable(new_page, vma))
1575 lru_cache_add_lru(new_page, LRU_ACTIVE_ANON);
1576 else
1577 add_page_to_unevictable_list(new_page);
1578 }
1579
1580 return new_page;
1581}
1582
1583int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
1584 unsigned long *vm_flags)
1585{
1586 struct stable_node *stable_node;
1587 struct rmap_item *rmap_item;
1588 struct hlist_node *hlist;
1589 unsigned int mapcount = page_mapcount(page);
1590 int referenced = 0;
1591 int search_new_forks = 0;
1592
1593 VM_BUG_ON(!PageKsm(page));
1594 VM_BUG_ON(!PageLocked(page));
1595
1596 stable_node = page_stable_node(page);
1597 if (!stable_node)
1598 return 0;
1599again:
1600 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1601 struct anon_vma *anon_vma = rmap_item->anon_vma;
1602 struct anon_vma_chain *vmac;
1603 struct vm_area_struct *vma;
1604
1605 anon_vma_lock(anon_vma);
1606 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
1607 vma = vmac->vma;
1608 if (rmap_item->address < vma->vm_start ||
1609 rmap_item->address >= vma->vm_end)
1610 continue;
1611
1612
1613
1614
1615
1616
1617 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1618 continue;
1619
1620 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
1621 continue;
1622
1623 referenced += page_referenced_one(page, vma,
1624 rmap_item->address, &mapcount, vm_flags);
1625 if (!search_new_forks || !mapcount)
1626 break;
1627 }
1628 anon_vma_unlock(anon_vma);
1629 if (!mapcount)
1630 goto out;
1631 }
1632 if (!search_new_forks++)
1633 goto again;
1634out:
1635 return referenced;
1636}
1637
1638int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
1639{
1640 struct stable_node *stable_node;
1641 struct hlist_node *hlist;
1642 struct rmap_item *rmap_item;
1643 int ret = SWAP_AGAIN;
1644 int search_new_forks = 0;
1645
1646 VM_BUG_ON(!PageKsm(page));
1647 VM_BUG_ON(!PageLocked(page));
1648
1649 stable_node = page_stable_node(page);
1650 if (!stable_node)
1651 return SWAP_FAIL;
1652again:
1653 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1654 struct anon_vma *anon_vma = rmap_item->anon_vma;
1655 struct anon_vma_chain *vmac;
1656 struct vm_area_struct *vma;
1657
1658 anon_vma_lock(anon_vma);
1659 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
1660 vma = vmac->vma;
1661 if (rmap_item->address < vma->vm_start ||
1662 rmap_item->address >= vma->vm_end)
1663 continue;
1664
1665
1666
1667
1668
1669
1670 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1671 continue;
1672
1673 ret = try_to_unmap_one(page, vma,
1674 rmap_item->address, flags);
1675 if (ret != SWAP_AGAIN || !page_mapped(page)) {
1676 anon_vma_unlock(anon_vma);
1677 goto out;
1678 }
1679 }
1680 anon_vma_unlock(anon_vma);
1681 }
1682 if (!search_new_forks++)
1683 goto again;
1684out:
1685 return ret;
1686}
1687
1688#ifdef CONFIG_MIGRATION
1689int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
1690 struct vm_area_struct *, unsigned long, void *), void *arg)
1691{
1692 struct stable_node *stable_node;
1693 struct hlist_node *hlist;
1694 struct rmap_item *rmap_item;
1695 int ret = SWAP_AGAIN;
1696 int search_new_forks = 0;
1697
1698 VM_BUG_ON(!PageKsm(page));
1699 VM_BUG_ON(!PageLocked(page));
1700
1701 stable_node = page_stable_node(page);
1702 if (!stable_node)
1703 return ret;
1704again:
1705 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1706 struct anon_vma *anon_vma = rmap_item->anon_vma;
1707 struct anon_vma_chain *vmac;
1708 struct vm_area_struct *vma;
1709
1710 anon_vma_lock(anon_vma);
1711 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
1712 vma = vmac->vma;
1713 if (rmap_item->address < vma->vm_start ||
1714 rmap_item->address >= vma->vm_end)
1715 continue;
1716
1717
1718
1719
1720
1721
1722 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1723 continue;
1724
1725 ret = rmap_one(page, vma, rmap_item->address, arg);
1726 if (ret != SWAP_AGAIN) {
1727 anon_vma_unlock(anon_vma);
1728 goto out;
1729 }
1730 }
1731 anon_vma_unlock(anon_vma);
1732 }
1733 if (!search_new_forks++)
1734 goto again;
1735out:
1736 return ret;
1737}
1738
1739void ksm_migrate_page(struct page *newpage, struct page *oldpage)
1740{
1741 struct stable_node *stable_node;
1742
1743 VM_BUG_ON(!PageLocked(oldpage));
1744 VM_BUG_ON(!PageLocked(newpage));
1745 VM_BUG_ON(newpage->mapping != oldpage->mapping);
1746
1747 stable_node = page_stable_node(newpage);
1748 if (stable_node) {
1749 VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
1750 stable_node->kpfn = page_to_pfn(newpage);
1751 }
1752}
1753#endif
1754
1755#ifdef CONFIG_MEMORY_HOTREMOVE
1756static struct stable_node *ksm_check_stable_tree(unsigned long start_pfn,
1757 unsigned long end_pfn)
1758{
1759 struct rb_node *node;
1760
1761 for (node = rb_first(&root_stable_tree); node; node = rb_next(node)) {
1762 struct stable_node *stable_node;
1763
1764 stable_node = rb_entry(node, struct stable_node, node);
1765 if (stable_node->kpfn >= start_pfn &&
1766 stable_node->kpfn < end_pfn)
1767 return stable_node;
1768 }
1769 return NULL;
1770}
1771
1772static int ksm_memory_callback(struct notifier_block *self,
1773 unsigned long action, void *arg)
1774{
1775 struct memory_notify *mn = arg;
1776 struct stable_node *stable_node;
1777
1778 switch (action) {
1779 case MEM_GOING_OFFLINE:
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789 mutex_lock_nested(&ksm_thread_mutex, SINGLE_DEPTH_NESTING);
1790 break;
1791
1792 case MEM_OFFLINE:
1793
1794
1795
1796
1797
1798 while ((stable_node = ksm_check_stable_tree(mn->start_pfn,
1799 mn->start_pfn + mn->nr_pages)) != NULL)
1800 remove_node_from_stable_tree(stable_node);
1801
1802
1803 case MEM_CANCEL_OFFLINE:
1804 mutex_unlock(&ksm_thread_mutex);
1805 break;
1806 }
1807 return NOTIFY_OK;
1808}
1809#endif
1810
1811#ifdef CONFIG_SYSFS
1812
1813
1814
1815
1816#define KSM_ATTR_RO(_name) \
1817 static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
1818#define KSM_ATTR(_name) \
1819 static struct kobj_attribute _name##_attr = \
1820 __ATTR(_name, 0644, _name##_show, _name##_store)
1821
1822static ssize_t sleep_millisecs_show(struct kobject *kobj,
1823 struct kobj_attribute *attr, char *buf)
1824{
1825 return sprintf(buf, "%u\n", ksm_thread_sleep_millisecs);
1826}
1827
1828static ssize_t sleep_millisecs_store(struct kobject *kobj,
1829 struct kobj_attribute *attr,
1830 const char *buf, size_t count)
1831{
1832 unsigned long msecs;
1833 int err;
1834
1835 err = strict_strtoul(buf, 10, &msecs);
1836 if (err || msecs > UINT_MAX)
1837 return -EINVAL;
1838
1839 ksm_thread_sleep_millisecs = msecs;
1840
1841 return count;
1842}
1843KSM_ATTR(sleep_millisecs);
1844
1845static ssize_t pages_to_scan_show(struct kobject *kobj,
1846 struct kobj_attribute *attr, char *buf)
1847{
1848 return sprintf(buf, "%u\n", ksm_thread_pages_to_scan);
1849}
1850
1851static ssize_t pages_to_scan_store(struct kobject *kobj,
1852 struct kobj_attribute *attr,
1853 const char *buf, size_t count)
1854{
1855 int err;
1856 unsigned long nr_pages;
1857
1858 err = strict_strtoul(buf, 10, &nr_pages);
1859 if (err || nr_pages > UINT_MAX)
1860 return -EINVAL;
1861
1862 ksm_thread_pages_to_scan = nr_pages;
1863
1864 return count;
1865}
1866KSM_ATTR(pages_to_scan);
1867
1868static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
1869 char *buf)
1870{
1871 return sprintf(buf, "%u\n", ksm_run);
1872}
1873
1874static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
1875 const char *buf, size_t count)
1876{
1877 int err;
1878 unsigned long flags;
1879
1880 err = strict_strtoul(buf, 10, &flags);
1881 if (err || flags > UINT_MAX)
1882 return -EINVAL;
1883 if (flags > KSM_RUN_UNMERGE)
1884 return -EINVAL;
1885
1886
1887
1888
1889
1890
1891
1892
1893 mutex_lock(&ksm_thread_mutex);
1894 if (ksm_run != flags) {
1895 ksm_run = flags;
1896 if (flags & KSM_RUN_UNMERGE) {
1897 current->flags |= PF_OOM_ORIGIN;
1898 err = unmerge_and_remove_all_rmap_items();
1899 current->flags &= ~PF_OOM_ORIGIN;
1900 if (err) {
1901 ksm_run = KSM_RUN_STOP;
1902 count = err;
1903 }
1904 }
1905 }
1906 mutex_unlock(&ksm_thread_mutex);
1907
1908 if (flags & KSM_RUN_MERGE)
1909 wake_up_interruptible(&ksm_thread_wait);
1910
1911 return count;
1912}
1913KSM_ATTR(run);
1914
1915static ssize_t pages_shared_show(struct kobject *kobj,
1916 struct kobj_attribute *attr, char *buf)
1917{
1918 return sprintf(buf, "%lu\n", ksm_pages_shared);
1919}
1920KSM_ATTR_RO(pages_shared);
1921
1922static ssize_t pages_sharing_show(struct kobject *kobj,
1923 struct kobj_attribute *attr, char *buf)
1924{
1925 return sprintf(buf, "%lu\n", ksm_pages_sharing);
1926}
1927KSM_ATTR_RO(pages_sharing);
1928
1929static ssize_t pages_unshared_show(struct kobject *kobj,
1930 struct kobj_attribute *attr, char *buf)
1931{
1932 return sprintf(buf, "%lu\n", ksm_pages_unshared);
1933}
1934KSM_ATTR_RO(pages_unshared);
1935
1936static ssize_t pages_volatile_show(struct kobject *kobj,
1937 struct kobj_attribute *attr, char *buf)
1938{
1939 long ksm_pages_volatile;
1940
1941 ksm_pages_volatile = ksm_rmap_items - ksm_pages_shared
1942 - ksm_pages_sharing - ksm_pages_unshared;
1943
1944
1945
1946
1947 if (ksm_pages_volatile < 0)
1948 ksm_pages_volatile = 0;
1949 return sprintf(buf, "%ld\n", ksm_pages_volatile);
1950}
1951KSM_ATTR_RO(pages_volatile);
1952
1953static ssize_t full_scans_show(struct kobject *kobj,
1954 struct kobj_attribute *attr, char *buf)
1955{
1956 return sprintf(buf, "%lu\n", ksm_scan.seqnr);
1957}
1958KSM_ATTR_RO(full_scans);
1959
1960static struct attribute *ksm_attrs[] = {
1961 &sleep_millisecs_attr.attr,
1962 &pages_to_scan_attr.attr,
1963 &run_attr.attr,
1964 &pages_shared_attr.attr,
1965 &pages_sharing_attr.attr,
1966 &pages_unshared_attr.attr,
1967 &pages_volatile_attr.attr,
1968 &full_scans_attr.attr,
1969 NULL,
1970};
1971
1972static struct attribute_group ksm_attr_group = {
1973 .attrs = ksm_attrs,
1974 .name = "ksm",
1975};
1976#endif
1977
1978static int __init ksm_init(void)
1979{
1980 struct task_struct *ksm_thread;
1981 int err;
1982
1983 err = ksm_slab_init();
1984 if (err)
1985 goto out;
1986
1987 ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd");
1988 if (IS_ERR(ksm_thread)) {
1989 printk(KERN_ERR "ksm: creating kthread failed\n");
1990 err = PTR_ERR(ksm_thread);
1991 goto out_free;
1992 }
1993
1994#ifdef CONFIG_SYSFS
1995 err = sysfs_create_group(mm_kobj, &ksm_attr_group);
1996 if (err) {
1997 printk(KERN_ERR "ksm: register sysfs failed\n");
1998 kthread_stop(ksm_thread);
1999 goto out_free;
2000 }
2001#else
2002 ksm_run = KSM_RUN_MERGE;
2003
2004#endif
2005
2006#ifdef CONFIG_MEMORY_HOTREMOVE
2007
2008
2009
2010
2011 hotplug_memory_notifier(ksm_memory_callback, 100);
2012#endif
2013 return 0;
2014
2015out_free:
2016 ksm_slab_free();
2017out:
2018 return err;
2019}
2020module_init(ksm_init)
2021