1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/mm.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/kernel_stat.h>
18#include <linux/swap.h>
19#include <linux/pagemap.h>
20#include <linux/init.h>
21#include <linux/highmem.h>
22#include <linux/vmstat.h>
23#include <linux/file.h>
24#include <linux/writeback.h>
25#include <linux/blkdev.h>
26#include <linux/buffer_head.h>
27
28#include <linux/mm_inline.h>
29#include <linux/pagevec.h>
30#include <linux/backing-dev.h>
31#include <linux/rmap.h>
32#include <linux/topology.h>
33#include <linux/cpu.h>
34#include <linux/cpuset.h>
35#include <linux/notifier.h>
36#include <linux/rwsem.h>
37#include <linux/delay.h>
38#include <linux/kthread.h>
39#include <linux/freezer.h>
40#include <linux/memcontrol.h>
41#include <linux/delayacct.h>
42#include <linux/sysctl.h>
43
44#include <asm/tlbflush.h>
45#include <asm/div64.h>
46
47#include <linux/swapops.h>
48
49#include "internal.h"
50
51struct scan_control {
52
53 unsigned long nr_scanned;
54
55
56 unsigned long nr_reclaimed;
57
58
59 gfp_t gfp_mask;
60
61 int may_writepage;
62
63
64 int may_unmap;
65
66
67 int may_swap;
68
69
70
71
72
73 int swap_cluster_max;
74
75 int swappiness;
76
77 int all_unreclaimable;
78
79 int order;
80
81
82 struct mem_cgroup *mem_cgroup;
83
84
85
86
87
88 nodemask_t *nodemask;
89
90
91 unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
92 unsigned long *scanned, int order, int mode,
93 struct zone *z, struct mem_cgroup *mem_cont,
94 int active, int file);
95};
96
97#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
98
99#ifdef ARCH_HAS_PREFETCH
100#define prefetch_prev_lru_page(_page, _base, _field) \
101 do { \
102 if ((_page)->lru.prev != _base) { \
103 struct page *prev; \
104 \
105 prev = lru_to_page(&(_page->lru)); \
106 prefetch(&prev->_field); \
107 } \
108 } while (0)
109#else
110#define prefetch_prev_lru_page(_page, _base, _field) do { } while (0)
111#endif
112
113#ifdef ARCH_HAS_PREFETCHW
114#define prefetchw_prev_lru_page(_page, _base, _field) \
115 do { \
116 if ((_page)->lru.prev != _base) { \
117 struct page *prev; \
118 \
119 prev = lru_to_page(&(_page->lru)); \
120 prefetchw(&prev->_field); \
121 } \
122 } while (0)
123#else
124#define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
125#endif
126
127
128
129
130int vm_swappiness = 60;
131long vm_total_pages;
132
133static LIST_HEAD(shrinker_list);
134static DECLARE_RWSEM(shrinker_rwsem);
135
136#ifdef CONFIG_CGROUP_MEM_RES_CTLR
137#define scanning_global_lru(sc) (!(sc)->mem_cgroup)
138#else
139#define scanning_global_lru(sc) (1)
140#endif
141
142static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,
143 struct scan_control *sc)
144{
145 if (!scanning_global_lru(sc))
146 return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone);
147
148 return &zone->reclaim_stat;
149}
150
151static unsigned long zone_nr_lru_pages(struct zone *zone,
152 struct scan_control *sc, enum lru_list lru)
153{
154 if (!scanning_global_lru(sc))
155 return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru);
156
157 return zone_page_state(zone, NR_LRU_BASE + lru);
158}
159
160
161
162
163
164void register_shrinker(struct shrinker *shrinker)
165{
166 shrinker->nr = 0;
167 down_write(&shrinker_rwsem);
168 list_add_tail(&shrinker->list, &shrinker_list);
169 up_write(&shrinker_rwsem);
170}
171EXPORT_SYMBOL(register_shrinker);
172
173
174
175
176void unregister_shrinker(struct shrinker *shrinker)
177{
178 down_write(&shrinker_rwsem);
179 list_del(&shrinker->list);
180 up_write(&shrinker_rwsem);
181}
182EXPORT_SYMBOL(unregister_shrinker);
183
184#define SHRINK_BATCH 128
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
205 unsigned long lru_pages)
206{
207 struct shrinker *shrinker;
208 unsigned long ret = 0;
209
210 if (scanned == 0)
211 scanned = SWAP_CLUSTER_MAX;
212
213 if (!down_read_trylock(&shrinker_rwsem))
214 return 1;
215
216 list_for_each_entry(shrinker, &shrinker_list, list) {
217 unsigned long long delta;
218 unsigned long total_scan;
219 unsigned long max_pass = (*shrinker->shrink)(0, gfp_mask);
220
221 delta = (4 * scanned) / shrinker->seeks;
222 delta *= max_pass;
223 do_div(delta, lru_pages + 1);
224 shrinker->nr += delta;
225 if (shrinker->nr < 0) {
226 printk(KERN_ERR "shrink_slab: %pF negative objects to "
227 "delete nr=%ld\n",
228 shrinker->shrink, shrinker->nr);
229 shrinker->nr = max_pass;
230 }
231
232
233
234
235
236
237 if (shrinker->nr > max_pass * 2)
238 shrinker->nr = max_pass * 2;
239
240 total_scan = shrinker->nr;
241 shrinker->nr = 0;
242
243 while (total_scan >= SHRINK_BATCH) {
244 long this_scan = SHRINK_BATCH;
245 int shrink_ret;
246 int nr_before;
247
248 nr_before = (*shrinker->shrink)(0, gfp_mask);
249 shrink_ret = (*shrinker->shrink)(this_scan, gfp_mask);
250 if (shrink_ret == -1)
251 break;
252 if (shrink_ret < nr_before)
253 ret += nr_before - shrink_ret;
254 count_vm_events(SLABS_SCANNED, this_scan);
255 total_scan -= this_scan;
256
257 cond_resched();
258 }
259
260 shrinker->nr += total_scan;
261 }
262 up_read(&shrinker_rwsem);
263 return ret;
264}
265
266
267static inline int page_mapping_inuse(struct page *page)
268{
269 struct address_space *mapping;
270
271
272 if (page_mapped(page))
273 return 1;
274
275
276 if (PageSwapCache(page))
277 return 1;
278
279 mapping = page_mapping(page);
280 if (!mapping)
281 return 0;
282
283
284 return mapping_mapped(mapping);
285}
286
287static inline int is_page_cache_freeable(struct page *page)
288{
289
290
291
292
293
294 return page_count(page) - page_has_private(page) == 2;
295}
296
297static int may_write_to_queue(struct backing_dev_info *bdi)
298{
299 if (current->flags & PF_SWAPWRITE)
300 return 1;
301 if (!bdi_write_congested(bdi))
302 return 1;
303 if (bdi == current->backing_dev_info)
304 return 1;
305 return 0;
306}
307
308
309
310
311
312
313
314
315
316
317
318
319
320static void handle_write_error(struct address_space *mapping,
321 struct page *page, int error)
322{
323 lock_page(page);
324 if (page_mapping(page) == mapping)
325 mapping_set_error(mapping, error);
326 unlock_page(page);
327}
328
329
330enum pageout_io {
331 PAGEOUT_IO_ASYNC,
332 PAGEOUT_IO_SYNC,
333};
334
335
336typedef enum {
337
338 PAGE_KEEP,
339
340 PAGE_ACTIVATE,
341
342 PAGE_SUCCESS,
343
344 PAGE_CLEAN,
345} pageout_t;
346
347
348
349
350
351static pageout_t pageout(struct page *page, struct address_space *mapping,
352 enum pageout_io sync_writeback)
353{
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370 if (!is_page_cache_freeable(page))
371 return PAGE_KEEP;
372 if (!mapping) {
373
374
375
376
377 if (page_has_private(page)) {
378 if (try_to_free_buffers(page)) {
379 ClearPageDirty(page);
380 printk("%s: orphaned page\n", __func__);
381 return PAGE_CLEAN;
382 }
383 }
384 return PAGE_KEEP;
385 }
386 if (mapping->a_ops->writepage == NULL)
387 return PAGE_ACTIVATE;
388 if (!may_write_to_queue(mapping->backing_dev_info))
389 return PAGE_KEEP;
390
391 if (clear_page_dirty_for_io(page)) {
392 int res;
393 struct writeback_control wbc = {
394 .sync_mode = WB_SYNC_NONE,
395 .nr_to_write = SWAP_CLUSTER_MAX,
396 .range_start = 0,
397 .range_end = LLONG_MAX,
398 .nonblocking = 1,
399 .for_reclaim = 1,
400 };
401
402 SetPageReclaim(page);
403 res = mapping->a_ops->writepage(page, &wbc);
404 if (res < 0)
405 handle_write_error(mapping, page, res);
406 if (res == AOP_WRITEPAGE_ACTIVATE) {
407 ClearPageReclaim(page);
408 return PAGE_ACTIVATE;
409 }
410
411
412
413
414
415
416 if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC)
417 wait_on_page_writeback(page);
418
419 if (!PageWriteback(page)) {
420
421 ClearPageReclaim(page);
422 }
423 inc_zone_page_state(page, NR_VMSCAN_WRITE);
424 return PAGE_SUCCESS;
425 }
426
427 return PAGE_CLEAN;
428}
429
430
431
432
433
434static int __remove_mapping(struct address_space *mapping, struct page *page)
435{
436 BUG_ON(!PageLocked(page));
437 BUG_ON(mapping != page_mapping(page));
438
439 spin_lock_irq(&mapping->tree_lock);
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465 if (!page_freeze_refs(page, 2))
466 goto cannot_free;
467
468 if (unlikely(PageDirty(page))) {
469 page_unfreeze_refs(page, 2);
470 goto cannot_free;
471 }
472
473 if (PageSwapCache(page)) {
474 swp_entry_t swap = { .val = page_private(page) };
475 __delete_from_swap_cache(page);
476 spin_unlock_irq(&mapping->tree_lock);
477 swapcache_free(swap, page);
478 } else {
479 __remove_from_page_cache(page);
480 spin_unlock_irq(&mapping->tree_lock);
481 mem_cgroup_uncharge_cache_page(page);
482 }
483
484 return 1;
485
486cannot_free:
487 spin_unlock_irq(&mapping->tree_lock);
488 return 0;
489}
490
491
492
493
494
495
496
497int remove_mapping(struct address_space *mapping, struct page *page)
498{
499 if (__remove_mapping(mapping, page)) {
500
501
502
503
504
505 page_unfreeze_refs(page, 1);
506 return 1;
507 }
508 return 0;
509}
510
511
512
513
514
515
516
517
518
519
520void putback_lru_page(struct page *page)
521{
522 int lru;
523 int active = !!TestClearPageActive(page);
524 int was_unevictable = PageUnevictable(page);
525
526 VM_BUG_ON(PageLRU(page));
527
528redo:
529 ClearPageUnevictable(page);
530
531 if (page_evictable(page, NULL)) {
532
533
534
535
536
537
538 lru = active + page_lru_base_type(page);
539 lru_cache_add_lru(page, lru);
540 } else {
541
542
543
544
545 lru = LRU_UNEVICTABLE;
546 add_page_to_unevictable_list(page);
547
548
549
550
551
552
553
554
555
556 smp_mb();
557 }
558
559
560
561
562
563
564 if (lru == LRU_UNEVICTABLE && page_evictable(page, NULL)) {
565 if (!isolate_lru_page(page)) {
566 put_page(page);
567 goto redo;
568 }
569
570
571
572
573 }
574
575 if (was_unevictable && lru != LRU_UNEVICTABLE)
576 count_vm_event(UNEVICTABLE_PGRESCUED);
577 else if (!was_unevictable && lru == LRU_UNEVICTABLE)
578 count_vm_event(UNEVICTABLE_PGCULLED);
579
580 put_page(page);
581}
582
583
584
585
586static unsigned long shrink_page_list(struct list_head *page_list,
587 struct scan_control *sc,
588 enum pageout_io sync_writeback)
589{
590 LIST_HEAD(ret_pages);
591 struct pagevec freed_pvec;
592 int pgactivate = 0;
593 unsigned long nr_reclaimed = 0;
594 unsigned long vm_flags;
595
596 cond_resched();
597
598 pagevec_init(&freed_pvec, 1);
599 while (!list_empty(page_list)) {
600 struct address_space *mapping;
601 struct page *page;
602 int may_enter_fs;
603 int referenced;
604
605 cond_resched();
606
607 page = lru_to_page(page_list);
608 list_del(&page->lru);
609
610 if (!trylock_page(page))
611 goto keep;
612
613 VM_BUG_ON(PageActive(page));
614
615 sc->nr_scanned++;
616
617 if (unlikely(!page_evictable(page, NULL)))
618 goto cull_mlocked;
619
620 if (!sc->may_unmap && page_mapped(page))
621 goto keep_locked;
622
623
624 if (page_mapped(page) || PageSwapCache(page))
625 sc->nr_scanned++;
626
627 may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
628 (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
629
630 if (PageWriteback(page)) {
631
632
633
634
635
636
637
638
639 if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs)
640 wait_on_page_writeback(page);
641 else
642 goto keep_locked;
643 }
644
645 referenced = page_referenced(page, 1,
646 sc->mem_cgroup, &vm_flags);
647
648
649
650
651
652 if (sc->order <= PAGE_ALLOC_COSTLY_ORDER &&
653 referenced && page_mapping_inuse(page)
654 && !(vm_flags & VM_LOCKED))
655 goto activate_locked;
656
657
658
659
660
661 if (PageAnon(page) && !PageSwapCache(page)) {
662 if (!(sc->gfp_mask & __GFP_IO))
663 goto keep_locked;
664 if (!add_to_swap(page))
665 goto activate_locked;
666 may_enter_fs = 1;
667 }
668
669 mapping = page_mapping(page);
670
671
672
673
674
675 if (page_mapped(page) && mapping) {
676 switch (try_to_unmap(page, TTU_UNMAP)) {
677 case SWAP_FAIL:
678 goto activate_locked;
679 case SWAP_AGAIN:
680 goto keep_locked;
681 case SWAP_MLOCK:
682 goto cull_mlocked;
683 case SWAP_SUCCESS:
684 ;
685 }
686 }
687
688 if (PageDirty(page)) {
689 if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced)
690 goto keep_locked;
691 if (!may_enter_fs)
692 goto keep_locked;
693 if (!sc->may_writepage)
694 goto keep_locked;
695
696
697 switch (pageout(page, mapping, sync_writeback)) {
698 case PAGE_KEEP:
699 goto keep_locked;
700 case PAGE_ACTIVATE:
701 goto activate_locked;
702 case PAGE_SUCCESS:
703 if (PageWriteback(page) || PageDirty(page))
704 goto keep;
705
706
707
708
709 if (!trylock_page(page))
710 goto keep;
711 if (PageDirty(page) || PageWriteback(page))
712 goto keep_locked;
713 mapping = page_mapping(page);
714 case PAGE_CLEAN:
715 ;
716 }
717 }
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740 if (page_has_private(page)) {
741 if (!try_to_release_page(page, sc->gfp_mask))
742 goto activate_locked;
743 if (!mapping && page_count(page) == 1) {
744 unlock_page(page);
745 if (put_page_testzero(page))
746 goto free_it;
747 else {
748
749
750
751
752
753
754
755 nr_reclaimed++;
756 continue;
757 }
758 }
759 }
760
761 if (!mapping || !__remove_mapping(mapping, page))
762 goto keep_locked;
763
764
765
766
767
768
769
770
771 __clear_page_locked(page);
772free_it:
773 nr_reclaimed++;
774 if (!pagevec_add(&freed_pvec, page)) {
775 __pagevec_free(&freed_pvec);
776 pagevec_reinit(&freed_pvec);
777 }
778 continue;
779
780cull_mlocked:
781 if (PageSwapCache(page))
782 try_to_free_swap(page);
783 unlock_page(page);
784 putback_lru_page(page);
785 continue;
786
787activate_locked:
788
789 if (PageSwapCache(page) && vm_swap_full())
790 try_to_free_swap(page);
791 VM_BUG_ON(PageActive(page));
792 SetPageActive(page);
793 pgactivate++;
794keep_locked:
795 unlock_page(page);
796keep:
797 list_add(&page->lru, &ret_pages);
798 VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
799 }
800 list_splice(&ret_pages, page_list);
801 if (pagevec_count(&freed_pvec))
802 __pagevec_free(&freed_pvec);
803 count_vm_events(PGACTIVATE, pgactivate);
804 return nr_reclaimed;
805}
806
807
808#define ISOLATE_INACTIVE 0
809#define ISOLATE_ACTIVE 1
810#define ISOLATE_BOTH 2
811
812
813
814
815
816
817
818
819
820
821
822int __isolate_lru_page(struct page *page, int mode, int file)
823{
824 int ret = -EINVAL;
825
826
827 if (!PageLRU(page))
828 return ret;
829
830
831
832
833
834
835 if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
836 return ret;
837
838 if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file)
839 return ret;
840
841
842
843
844
845
846 if (PageUnevictable(page))
847 return ret;
848
849 ret = -EBUSY;
850
851 if (likely(get_page_unless_zero(page))) {
852
853
854
855
856
857 ClearPageLRU(page);
858 ret = 0;
859 }
860
861 return ret;
862}
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
885 struct list_head *src, struct list_head *dst,
886 unsigned long *scanned, int order, int mode, int file)
887{
888 unsigned long nr_taken = 0;
889 unsigned long scan;
890
891 for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
892 struct page *page;
893 unsigned long pfn;
894 unsigned long end_pfn;
895 unsigned long page_pfn;
896 int zone_id;
897
898 page = lru_to_page(src);
899 prefetchw_prev_lru_page(page, src, flags);
900
901 VM_BUG_ON(!PageLRU(page));
902
903 switch (__isolate_lru_page(page, mode, file)) {
904 case 0:
905 list_move(&page->lru, dst);
906 mem_cgroup_del_lru(page);
907 nr_taken++;
908 break;
909
910 case -EBUSY:
911
912 list_move(&page->lru, src);
913 mem_cgroup_rotate_lru_list(page, page_lru(page));
914 continue;
915
916 default:
917 BUG();
918 }
919
920 if (!order)
921 continue;
922
923
924
925
926
927
928
929
930
931
932 zone_id = page_zone_id(page);
933 page_pfn = page_to_pfn(page);
934 pfn = page_pfn & ~((1 << order) - 1);
935 end_pfn = pfn + (1 << order);
936 for (; pfn < end_pfn; pfn++) {
937 struct page *cursor_page;
938
939
940 if (unlikely(pfn == page_pfn))
941 continue;
942
943
944 if (unlikely(!pfn_valid_within(pfn)))
945 break;
946
947 cursor_page = pfn_to_page(pfn);
948
949
950 if (unlikely(page_zone_id(cursor_page) != zone_id))
951 continue;
952
953
954
955
956
957
958 if (nr_swap_pages <= 0 && PageAnon(cursor_page) &&
959 !PageSwapCache(cursor_page))
960 continue;
961
962 if (__isolate_lru_page(cursor_page, mode, file) == 0) {
963 list_move(&cursor_page->lru, dst);
964 mem_cgroup_del_lru(cursor_page);
965 nr_taken++;
966 scan++;
967 }
968 }
969 }
970
971 *scanned = scan;
972 return nr_taken;
973}
974
975static unsigned long isolate_pages_global(unsigned long nr,
976 struct list_head *dst,
977 unsigned long *scanned, int order,
978 int mode, struct zone *z,
979 struct mem_cgroup *mem_cont,
980 int active, int file)
981{
982 int lru = LRU_BASE;
983 if (active)
984 lru += LRU_ACTIVE;
985 if (file)
986 lru += LRU_FILE;
987 return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
988 mode, file);
989}
990
991
992
993
994
995static unsigned long clear_active_flags(struct list_head *page_list,
996 unsigned int *count)
997{
998 int nr_active = 0;
999 int lru;
1000 struct page *page;
1001
1002 list_for_each_entry(page, page_list, lru) {
1003 lru = page_lru_base_type(page);
1004 if (PageActive(page)) {
1005 lru += LRU_ACTIVE;
1006 ClearPageActive(page);
1007 nr_active++;
1008 }
1009 count[lru]++;
1010 }
1011
1012 return nr_active;
1013}
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040int isolate_lru_page(struct page *page)
1041{
1042 int ret = -EBUSY;
1043
1044 if (PageLRU(page)) {
1045 struct zone *zone = page_zone(page);
1046
1047 spin_lock_irq(&zone->lru_lock);
1048 if (PageLRU(page) && get_page_unless_zero(page)) {
1049 int lru = page_lru(page);
1050 ret = 0;
1051 ClearPageLRU(page);
1052
1053 del_page_from_lru_list(zone, page, lru);
1054 }
1055 spin_unlock_irq(&zone->lru_lock);
1056 }
1057 return ret;
1058}
1059
1060
1061
1062
1063static int too_many_isolated(struct zone *zone, int file,
1064 struct scan_control *sc)
1065{
1066 unsigned long inactive, isolated;
1067
1068 if (current_is_kswapd())
1069 return 0;
1070
1071 if (!scanning_global_lru(sc))
1072 return 0;
1073
1074 if (file) {
1075 inactive = zone_page_state(zone, NR_INACTIVE_FILE);
1076 isolated = zone_page_state(zone, NR_ISOLATED_FILE);
1077 } else {
1078 inactive = zone_page_state(zone, NR_INACTIVE_ANON);
1079 isolated = zone_page_state(zone, NR_ISOLATED_ANON);
1080 }
1081
1082 return isolated > inactive;
1083}
1084
1085
1086
1087
1088
1089static unsigned long shrink_inactive_list(unsigned long max_scan,
1090 struct zone *zone, struct scan_control *sc,
1091 int priority, int file)
1092{
1093 LIST_HEAD(page_list);
1094 struct pagevec pvec;
1095 unsigned long nr_scanned = 0;
1096 unsigned long nr_reclaimed = 0;
1097 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1098 int lumpy_reclaim = 0;
1099
1100 while (unlikely(too_many_isolated(zone, file, sc))) {
1101 congestion_wait(BLK_RW_ASYNC, HZ/10);
1102
1103
1104 if (fatal_signal_pending(current))
1105 return SWAP_CLUSTER_MAX;
1106 }
1107
1108
1109
1110
1111
1112
1113
1114
1115 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
1116 lumpy_reclaim = 1;
1117 else if (sc->order && priority < DEF_PRIORITY - 2)
1118 lumpy_reclaim = 1;
1119
1120 pagevec_init(&pvec, 1);
1121
1122 lru_add_drain();
1123 spin_lock_irq(&zone->lru_lock);
1124 do {
1125 struct page *page;
1126 unsigned long nr_taken;
1127 unsigned long nr_scan;
1128 unsigned long nr_freed;
1129 unsigned long nr_active;
1130 unsigned int count[NR_LRU_LISTS] = { 0, };
1131 int mode = lumpy_reclaim ? ISOLATE_BOTH : ISOLATE_INACTIVE;
1132 unsigned long nr_anon;
1133 unsigned long nr_file;
1134
1135 nr_taken = sc->isolate_pages(sc->swap_cluster_max,
1136 &page_list, &nr_scan, sc->order, mode,
1137 zone, sc->mem_cgroup, 0, file);
1138
1139 if (scanning_global_lru(sc)) {
1140 zone->pages_scanned += nr_scan;
1141 if (current_is_kswapd())
1142 __count_zone_vm_events(PGSCAN_KSWAPD, zone,
1143 nr_scan);
1144 else
1145 __count_zone_vm_events(PGSCAN_DIRECT, zone,
1146 nr_scan);
1147 }
1148
1149 if (nr_taken == 0)
1150 goto done;
1151
1152 nr_active = clear_active_flags(&page_list, count);
1153 __count_vm_events(PGDEACTIVATE, nr_active);
1154
1155 __mod_zone_page_state(zone, NR_ACTIVE_FILE,
1156 -count[LRU_ACTIVE_FILE]);
1157 __mod_zone_page_state(zone, NR_INACTIVE_FILE,
1158 -count[LRU_INACTIVE_FILE]);
1159 __mod_zone_page_state(zone, NR_ACTIVE_ANON,
1160 -count[LRU_ACTIVE_ANON]);
1161 __mod_zone_page_state(zone, NR_INACTIVE_ANON,
1162 -count[LRU_INACTIVE_ANON]);
1163
1164 nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
1165 nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
1166 __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);
1167 __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);
1168
1169 reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON];
1170 reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON];
1171 reclaim_stat->recent_scanned[1] += count[LRU_INACTIVE_FILE];
1172 reclaim_stat->recent_scanned[1] += count[LRU_ACTIVE_FILE];
1173
1174 spin_unlock_irq(&zone->lru_lock);
1175
1176 nr_scanned += nr_scan;
1177 nr_freed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC);
1178
1179
1180
1181
1182
1183
1184
1185 if (nr_freed < nr_taken && !current_is_kswapd() &&
1186 lumpy_reclaim) {
1187 congestion_wait(BLK_RW_ASYNC, HZ/10);
1188
1189
1190
1191
1192
1193 nr_active = clear_active_flags(&page_list, count);
1194 count_vm_events(PGDEACTIVATE, nr_active);
1195
1196 nr_freed += shrink_page_list(&page_list, sc,
1197 PAGEOUT_IO_SYNC);
1198 }
1199
1200 nr_reclaimed += nr_freed;
1201
1202 local_irq_disable();
1203 if (current_is_kswapd())
1204 __count_vm_events(KSWAPD_STEAL, nr_freed);
1205 __count_zone_vm_events(PGSTEAL, zone, nr_freed);
1206
1207 spin_lock(&zone->lru_lock);
1208
1209
1210
1211 while (!list_empty(&page_list)) {
1212 int lru;
1213 page = lru_to_page(&page_list);
1214 VM_BUG_ON(PageLRU(page));
1215 list_del(&page->lru);
1216 if (unlikely(!page_evictable(page, NULL))) {
1217 spin_unlock_irq(&zone->lru_lock);
1218 putback_lru_page(page);
1219 spin_lock_irq(&zone->lru_lock);
1220 continue;
1221 }
1222 SetPageLRU(page);
1223 lru = page_lru(page);
1224 add_page_to_lru_list(zone, page, lru);
1225 if (is_active_lru(lru)) {
1226 int file = is_file_lru(lru);
1227 reclaim_stat->recent_rotated[file]++;
1228 }
1229 if (!pagevec_add(&pvec, page)) {
1230 spin_unlock_irq(&zone->lru_lock);
1231 __pagevec_release(&pvec);
1232 spin_lock_irq(&zone->lru_lock);
1233 }
1234 }
1235 __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
1236 __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
1237
1238 } while (nr_scanned < max_scan);
1239
1240done:
1241 spin_unlock_irq(&zone->lru_lock);
1242 pagevec_release(&pvec);
1243 return nr_reclaimed;
1244}
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254static inline void note_zone_scanning_priority(struct zone *zone, int priority)
1255{
1256 if (priority < zone->prev_priority)
1257 zone->prev_priority = priority;
1258}
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278static void move_active_pages_to_lru(struct zone *zone,
1279 struct list_head *list,
1280 enum lru_list lru)
1281{
1282 unsigned long pgmoved = 0;
1283 struct pagevec pvec;
1284 struct page *page;
1285
1286 pagevec_init(&pvec, 1);
1287
1288 while (!list_empty(list)) {
1289 page = lru_to_page(list);
1290
1291 VM_BUG_ON(PageLRU(page));
1292 SetPageLRU(page);
1293
1294 list_move(&page->lru, &zone->lru[lru].list);
1295 mem_cgroup_add_lru_list(page, lru);
1296 pgmoved++;
1297
1298 if (!pagevec_add(&pvec, page) || list_empty(list)) {
1299 spin_unlock_irq(&zone->lru_lock);
1300 if (buffer_heads_over_limit)
1301 pagevec_strip(&pvec);
1302 __pagevec_release(&pvec);
1303 spin_lock_irq(&zone->lru_lock);
1304 }
1305 }
1306 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
1307 if (!is_active_lru(lru))
1308 __count_vm_events(PGDEACTIVATE, pgmoved);
1309}
1310
1311static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1312 struct scan_control *sc, int priority, int file)
1313{
1314 unsigned long nr_taken;
1315 unsigned long pgscanned;
1316 unsigned long vm_flags;
1317 LIST_HEAD(l_hold);
1318 LIST_HEAD(l_active);
1319 LIST_HEAD(l_inactive);
1320 struct page *page;
1321 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1322 unsigned long nr_rotated = 0;
1323
1324 lru_add_drain();
1325 spin_lock_irq(&zone->lru_lock);
1326 nr_taken = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
1327 ISOLATE_ACTIVE, zone,
1328 sc->mem_cgroup, 1, file);
1329
1330
1331
1332
1333 if (scanning_global_lru(sc)) {
1334 zone->pages_scanned += pgscanned;
1335 }
1336 reclaim_stat->recent_scanned[file] += nr_taken;
1337
1338 __count_zone_vm_events(PGREFILL, zone, pgscanned);
1339 if (file)
1340 __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
1341 else
1342 __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
1343 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
1344 spin_unlock_irq(&zone->lru_lock);
1345
1346 while (!list_empty(&l_hold)) {
1347 cond_resched();
1348 page = lru_to_page(&l_hold);
1349 list_del(&page->lru);
1350
1351 if (unlikely(!page_evictable(page, NULL))) {
1352 putback_lru_page(page);
1353 continue;
1354 }
1355
1356
1357 if (page_mapping_inuse(page) &&
1358 page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
1359 nr_rotated++;
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369 if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) {
1370 list_add(&page->lru, &l_active);
1371 continue;
1372 }
1373 }
1374
1375 ClearPageActive(page);
1376 list_add(&page->lru, &l_inactive);
1377 }
1378
1379
1380
1381
1382 spin_lock_irq(&zone->lru_lock);
1383
1384
1385
1386
1387
1388
1389 reclaim_stat->recent_rotated[file] += nr_rotated;
1390
1391 move_active_pages_to_lru(zone, &l_active,
1392 LRU_ACTIVE + file * LRU_FILE);
1393 move_active_pages_to_lru(zone, &l_inactive,
1394 LRU_BASE + file * LRU_FILE);
1395 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
1396 spin_unlock_irq(&zone->lru_lock);
1397}
1398
1399static int inactive_anon_is_low_global(struct zone *zone)
1400{
1401 unsigned long active, inactive;
1402
1403 active = zone_page_state(zone, NR_ACTIVE_ANON);
1404 inactive = zone_page_state(zone, NR_INACTIVE_ANON);
1405
1406 if (inactive * zone->inactive_ratio < active)
1407 return 1;
1408
1409 return 0;
1410}
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
1421{
1422 int low;
1423
1424 if (scanning_global_lru(sc))
1425 low = inactive_anon_is_low_global(zone);
1426 else
1427 low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup);
1428 return low;
1429}
1430
1431static int inactive_file_is_low_global(struct zone *zone)
1432{
1433 unsigned long active, inactive;
1434
1435 active = zone_page_state(zone, NR_ACTIVE_FILE);
1436 inactive = zone_page_state(zone, NR_INACTIVE_FILE);
1437
1438 return (active > inactive);
1439}
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456static int inactive_file_is_low(struct zone *zone, struct scan_control *sc)
1457{
1458 int low;
1459
1460 if (scanning_global_lru(sc))
1461 low = inactive_file_is_low_global(zone);
1462 else
1463 low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup);
1464 return low;
1465}
1466
1467static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1468 struct zone *zone, struct scan_control *sc, int priority)
1469{
1470 int file = is_file_lru(lru);
1471
1472 if (lru == LRU_ACTIVE_FILE && inactive_file_is_low(zone, sc)) {
1473 shrink_active_list(nr_to_scan, zone, sc, priority, file);
1474 return 0;
1475 }
1476
1477 if (lru == LRU_ACTIVE_ANON && inactive_anon_is_low(zone, sc)) {
1478 shrink_active_list(nr_to_scan, zone, sc, priority, file);
1479 return 0;
1480 }
1481 return shrink_inactive_list(nr_to_scan, zone, sc, priority, file);
1482}
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1494 unsigned long *percent)
1495{
1496 unsigned long anon, file, free;
1497 unsigned long anon_prio, file_prio;
1498 unsigned long ap, fp;
1499 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1500
1501 anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
1502 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
1503 file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
1504 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
1505
1506 if (scanning_global_lru(sc)) {
1507 free = zone_page_state(zone, NR_FREE_PAGES);
1508
1509
1510 if (unlikely(file + free <= high_wmark_pages(zone))) {
1511 percent[0] = 100;
1512 percent[1] = 0;
1513 return;
1514 }
1515 }
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528 if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
1529 spin_lock_irq(&zone->lru_lock);
1530 reclaim_stat->recent_scanned[0] /= 2;
1531 reclaim_stat->recent_rotated[0] /= 2;
1532 spin_unlock_irq(&zone->lru_lock);
1533 }
1534
1535 if (unlikely(reclaim_stat->recent_scanned[1] > file / 4)) {
1536 spin_lock_irq(&zone->lru_lock);
1537 reclaim_stat->recent_scanned[1] /= 2;
1538 reclaim_stat->recent_rotated[1] /= 2;
1539 spin_unlock_irq(&zone->lru_lock);
1540 }
1541
1542
1543
1544
1545
1546 anon_prio = sc->swappiness;
1547 file_prio = 200 - sc->swappiness;
1548
1549
1550
1551
1552
1553
1554 ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1);
1555 ap /= reclaim_stat->recent_rotated[0] + 1;
1556
1557 fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
1558 fp /= reclaim_stat->recent_rotated[1] + 1;
1559
1560
1561 percent[0] = 100 * ap / (ap + fp + 1);
1562 percent[1] = 100 - percent[0];
1563}
1564
1565
1566
1567
1568
1569static unsigned long nr_scan_try_batch(unsigned long nr_to_scan,
1570 unsigned long *nr_saved_scan,
1571 unsigned long swap_cluster_max)
1572{
1573 unsigned long nr;
1574
1575 *nr_saved_scan += nr_to_scan;
1576 nr = *nr_saved_scan;
1577
1578 if (nr >= swap_cluster_max)
1579 *nr_saved_scan = 0;
1580 else
1581 nr = 0;
1582
1583 return nr;
1584}
1585
1586
1587
1588
1589static void shrink_zone(int priority, struct zone *zone,
1590 struct scan_control *sc)
1591{
1592 unsigned long nr[NR_LRU_LISTS];
1593 unsigned long nr_to_scan;
1594 unsigned long percent[2];
1595 enum lru_list l;
1596 unsigned long nr_reclaimed = sc->nr_reclaimed;
1597 unsigned long swap_cluster_max = sc->swap_cluster_max;
1598 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1599 int noswap = 0;
1600
1601
1602 if (!sc->may_swap || (nr_swap_pages <= 0)) {
1603 noswap = 1;
1604 percent[0] = 0;
1605 percent[1] = 100;
1606 } else
1607 get_scan_ratio(zone, sc, percent);
1608
1609 for_each_evictable_lru(l) {
1610 int file = is_file_lru(l);
1611 unsigned long scan;
1612
1613 scan = zone_nr_lru_pages(zone, sc, l);
1614 if (priority || noswap) {
1615 scan >>= priority;
1616 scan = (scan * percent[file]) / 100;
1617 }
1618 nr[l] = nr_scan_try_batch(scan,
1619 &reclaim_stat->nr_saved_scan[l],
1620 swap_cluster_max);
1621 }
1622
1623 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
1624 nr[LRU_INACTIVE_FILE]) {
1625 for_each_evictable_lru(l) {
1626 if (nr[l]) {
1627 nr_to_scan = min(nr[l], swap_cluster_max);
1628 nr[l] -= nr_to_scan;
1629
1630 nr_reclaimed += shrink_list(l, nr_to_scan,
1631 zone, sc, priority);
1632 }
1633 }
1634
1635
1636
1637
1638
1639
1640
1641
1642 if (nr_reclaimed > swap_cluster_max &&
1643 priority < DEF_PRIORITY && !current_is_kswapd())
1644 break;
1645 }
1646
1647 sc->nr_reclaimed = nr_reclaimed;
1648
1649
1650
1651
1652
1653 if (inactive_anon_is_low(zone, sc) && nr_swap_pages > 0)
1654 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
1655
1656 throttle_vm_writeout(sc->gfp_mask);
1657}
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675static void shrink_zones(int priority, struct zonelist *zonelist,
1676 struct scan_control *sc)
1677{
1678 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
1679 struct zoneref *z;
1680 struct zone *zone;
1681
1682 sc->all_unreclaimable = 1;
1683 for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
1684 sc->nodemask) {
1685 if (!populated_zone(zone))
1686 continue;
1687
1688
1689
1690
1691 if (scanning_global_lru(sc)) {
1692 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1693 continue;
1694 note_zone_scanning_priority(zone, priority);
1695
1696 if (zone_is_all_unreclaimable(zone) &&
1697 priority != DEF_PRIORITY)
1698 continue;
1699 sc->all_unreclaimable = 0;
1700 } else {
1701
1702
1703
1704
1705 sc->all_unreclaimable = 0;
1706 mem_cgroup_note_reclaim_priority(sc->mem_cgroup,
1707 priority);
1708 }
1709
1710 shrink_zone(priority, zone, sc);
1711 }
1712}
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1731 struct scan_control *sc)
1732{
1733 int priority;
1734 unsigned long ret = 0;
1735 unsigned long total_scanned = 0;
1736 struct reclaim_state *reclaim_state = current->reclaim_state;
1737 unsigned long lru_pages = 0;
1738 struct zoneref *z;
1739 struct zone *zone;
1740 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
1741
1742 delayacct_freepages_start();
1743
1744 if (scanning_global_lru(sc))
1745 count_vm_event(ALLOCSTALL);
1746
1747
1748
1749 if (scanning_global_lru(sc)) {
1750 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1751
1752 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1753 continue;
1754
1755 lru_pages += zone_reclaimable_pages(zone);
1756 }
1757 }
1758
1759 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
1760 sc->nr_scanned = 0;
1761 if (!priority)
1762 disable_swap_token();
1763 shrink_zones(priority, zonelist, sc);
1764
1765
1766
1767
1768 if (scanning_global_lru(sc)) {
1769 shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
1770 if (reclaim_state) {
1771 sc->nr_reclaimed += reclaim_state->reclaimed_slab;
1772 reclaim_state->reclaimed_slab = 0;
1773 }
1774 }
1775 total_scanned += sc->nr_scanned;
1776 if (sc->nr_reclaimed >= sc->swap_cluster_max) {
1777 ret = sc->nr_reclaimed;
1778 goto out;
1779 }
1780
1781
1782
1783
1784
1785
1786
1787
1788 if (total_scanned > sc->swap_cluster_max +
1789 sc->swap_cluster_max / 2) {
1790 wakeup_flusher_threads(laptop_mode ? 0 : total_scanned);
1791 sc->may_writepage = 1;
1792 }
1793
1794
1795 if (sc->nr_scanned && priority < DEF_PRIORITY - 2)
1796 congestion_wait(BLK_RW_ASYNC, HZ/10);
1797 }
1798
1799 if (!sc->all_unreclaimable && scanning_global_lru(sc))
1800 ret = sc->nr_reclaimed;
1801out:
1802
1803
1804
1805
1806
1807
1808
1809 if (priority < 0)
1810 priority = 0;
1811
1812 if (scanning_global_lru(sc)) {
1813 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1814
1815 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1816 continue;
1817
1818 zone->prev_priority = priority;
1819 }
1820 } else
1821 mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority);
1822
1823 delayacct_freepages_end();
1824
1825 return ret;
1826}
1827
1828unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
1829 gfp_t gfp_mask, nodemask_t *nodemask)
1830{
1831 struct scan_control sc = {
1832 .gfp_mask = gfp_mask,
1833 .may_writepage = !laptop_mode,
1834 .swap_cluster_max = SWAP_CLUSTER_MAX,
1835 .may_unmap = 1,
1836 .may_swap = 1,
1837 .swappiness = vm_swappiness,
1838 .order = order,
1839 .mem_cgroup = NULL,
1840 .isolate_pages = isolate_pages_global,
1841 .nodemask = nodemask,
1842 };
1843
1844 return do_try_to_free_pages(zonelist, &sc);
1845}
1846
1847#ifdef CONFIG_CGROUP_MEM_RES_CTLR
1848
1849unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
1850 gfp_t gfp_mask, bool noswap,
1851 unsigned int swappiness,
1852 struct zone *zone, int nid)
1853{
1854 struct scan_control sc = {
1855 .may_writepage = !laptop_mode,
1856 .may_unmap = 1,
1857 .may_swap = !noswap,
1858 .swap_cluster_max = SWAP_CLUSTER_MAX,
1859 .swappiness = swappiness,
1860 .order = 0,
1861 .mem_cgroup = mem,
1862 .isolate_pages = mem_cgroup_isolate_pages,
1863 };
1864 nodemask_t nm = nodemask_of_node(nid);
1865
1866 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
1867 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
1868 sc.nodemask = &nm;
1869 sc.nr_reclaimed = 0;
1870 sc.nr_scanned = 0;
1871
1872
1873
1874
1875
1876
1877
1878 shrink_zone(0, zone, &sc);
1879 return sc.nr_reclaimed;
1880}
1881
1882unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1883 gfp_t gfp_mask,
1884 bool noswap,
1885 unsigned int swappiness)
1886{
1887 struct zonelist *zonelist;
1888 struct scan_control sc = {
1889 .may_writepage = !laptop_mode,
1890 .may_unmap = 1,
1891 .may_swap = !noswap,
1892 .swap_cluster_max = SWAP_CLUSTER_MAX,
1893 .swappiness = swappiness,
1894 .order = 0,
1895 .mem_cgroup = mem_cont,
1896 .isolate_pages = mem_cgroup_isolate_pages,
1897 .nodemask = NULL,
1898 };
1899
1900 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
1901 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
1902 zonelist = NODE_DATA(numa_node_id())->node_zonelists;
1903 return do_try_to_free_pages(zonelist, &sc);
1904}
1905#endif
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
1929{
1930 int all_zones_ok;
1931 int priority;
1932 int i;
1933 unsigned long total_scanned;
1934 struct reclaim_state *reclaim_state = current->reclaim_state;
1935 struct scan_control sc = {
1936 .gfp_mask = GFP_KERNEL,
1937 .may_unmap = 1,
1938 .may_swap = 1,
1939 .swap_cluster_max = SWAP_CLUSTER_MAX,
1940 .swappiness = vm_swappiness,
1941 .order = order,
1942 .mem_cgroup = NULL,
1943 .isolate_pages = isolate_pages_global,
1944 };
1945
1946
1947
1948
1949
1950 int temp_priority[MAX_NR_ZONES];
1951
1952loop_again:
1953 total_scanned = 0;
1954 sc.nr_reclaimed = 0;
1955 sc.may_writepage = !laptop_mode;
1956 count_vm_event(PAGEOUTRUN);
1957
1958 for (i = 0; i < pgdat->nr_zones; i++)
1959 temp_priority[i] = DEF_PRIORITY;
1960
1961 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
1962 int end_zone = 0;
1963 unsigned long lru_pages = 0;
1964
1965
1966 if (!priority)
1967 disable_swap_token();
1968
1969 all_zones_ok = 1;
1970
1971
1972
1973
1974
1975 for (i = pgdat->nr_zones - 1; i >= 0; i--) {
1976 struct zone *zone = pgdat->node_zones + i;
1977
1978 if (!populated_zone(zone))
1979 continue;
1980
1981 if (zone_is_all_unreclaimable(zone) &&
1982 priority != DEF_PRIORITY)
1983 continue;
1984
1985
1986
1987
1988
1989 if (inactive_anon_is_low(zone, &sc))
1990 shrink_active_list(SWAP_CLUSTER_MAX, zone,
1991 &sc, priority, 0);
1992
1993 if (!zone_watermark_ok(zone, order,
1994 high_wmark_pages(zone), 0, 0)) {
1995 end_zone = i;
1996 break;
1997 }
1998 }
1999 if (i < 0)
2000 goto out;
2001
2002 for (i = 0; i <= end_zone; i++) {
2003 struct zone *zone = pgdat->node_zones + i;
2004
2005 lru_pages += zone_reclaimable_pages(zone);
2006 }
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017 for (i = 0; i <= end_zone; i++) {
2018 struct zone *zone = pgdat->node_zones + i;
2019 int nr_slab;
2020 int nid, zid;
2021
2022 if (!populated_zone(zone))
2023 continue;
2024
2025 if (zone_is_all_unreclaimable(zone) &&
2026 priority != DEF_PRIORITY)
2027 continue;
2028
2029 if (!zone_watermark_ok(zone, order,
2030 high_wmark_pages(zone), end_zone, 0))
2031 all_zones_ok = 0;
2032 temp_priority[i] = priority;
2033 sc.nr_scanned = 0;
2034 note_zone_scanning_priority(zone, priority);
2035
2036 nid = pgdat->node_id;
2037 zid = zone_idx(zone);
2038
2039
2040
2041
2042 mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask,
2043 nid, zid);
2044
2045
2046
2047
2048 if (!zone_watermark_ok(zone, order,
2049 8*high_wmark_pages(zone), end_zone, 0))
2050 shrink_zone(priority, zone, &sc);
2051 reclaim_state->reclaimed_slab = 0;
2052 nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
2053 lru_pages);
2054 sc.nr_reclaimed += reclaim_state->reclaimed_slab;
2055 total_scanned += sc.nr_scanned;
2056 if (zone_is_all_unreclaimable(zone))
2057 continue;
2058 if (nr_slab == 0 && zone->pages_scanned >=
2059 (zone_reclaimable_pages(zone) * 6))
2060 zone_set_flag(zone,
2061 ZONE_ALL_UNRECLAIMABLE);
2062
2063
2064
2065
2066
2067 if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
2068 total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
2069 sc.may_writepage = 1;
2070 }
2071 if (all_zones_ok)
2072 break;
2073
2074
2075
2076
2077 if (total_scanned && priority < DEF_PRIORITY - 2)
2078 congestion_wait(BLK_RW_ASYNC, HZ/10);
2079
2080
2081
2082
2083
2084
2085
2086 if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)
2087 break;
2088 }
2089out:
2090
2091
2092
2093
2094
2095 for (i = 0; i < pgdat->nr_zones; i++) {
2096 struct zone *zone = pgdat->node_zones + i;
2097
2098 zone->prev_priority = temp_priority[i];
2099 }
2100 if (!all_zones_ok) {
2101 cond_resched();
2102
2103 try_to_freeze();
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119 if (sc.nr_reclaimed < SWAP_CLUSTER_MAX)
2120 order = sc.order = 0;
2121
2122 goto loop_again;
2123 }
2124
2125 return sc.nr_reclaimed;
2126}
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141static int kswapd(void *p)
2142{
2143 unsigned long order;
2144 pg_data_t *pgdat = (pg_data_t*)p;
2145 struct task_struct *tsk = current;
2146 DEFINE_WAIT(wait);
2147 struct reclaim_state reclaim_state = {
2148 .reclaimed_slab = 0,
2149 };
2150 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
2151
2152 lockdep_set_current_reclaim_state(GFP_KERNEL);
2153
2154 if (!cpumask_empty(cpumask))
2155 set_cpus_allowed_ptr(tsk, cpumask);
2156 current->reclaim_state = &reclaim_state;
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2171 set_freezable();
2172
2173 order = 0;
2174 for ( ; ; ) {
2175 unsigned long new_order;
2176
2177 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
2178 new_order = pgdat->kswapd_max_order;
2179 pgdat->kswapd_max_order = 0;
2180 if (order < new_order) {
2181
2182
2183
2184
2185 order = new_order;
2186 } else {
2187 if (!freezing(current))
2188 schedule();
2189
2190 order = pgdat->kswapd_max_order;
2191 }
2192 finish_wait(&pgdat->kswapd_wait, &wait);
2193
2194 if (!try_to_freeze()) {
2195
2196
2197
2198 balance_pgdat(pgdat, order);
2199 }
2200 }
2201 return 0;
2202}
2203
2204
2205
2206
2207void wakeup_kswapd(struct zone *zone, int order)
2208{
2209 pg_data_t *pgdat;
2210
2211 if (!populated_zone(zone))
2212 return;
2213
2214 pgdat = zone->zone_pgdat;
2215 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
2216 return;
2217 if (pgdat->kswapd_max_order < order)
2218 pgdat->kswapd_max_order = order;
2219 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
2220 return;
2221 if (!waitqueue_active(&pgdat->kswapd_wait))
2222 return;
2223 wake_up_interruptible(&pgdat->kswapd_wait);
2224}
2225
2226
2227
2228
2229
2230
2231
2232
2233unsigned long global_reclaimable_pages(void)
2234{
2235 int nr;
2236
2237 nr = global_page_state(NR_ACTIVE_FILE) +
2238 global_page_state(NR_INACTIVE_FILE);
2239
2240 if (nr_swap_pages > 0)
2241 nr += global_page_state(NR_ACTIVE_ANON) +
2242 global_page_state(NR_INACTIVE_ANON);
2243
2244 return nr;
2245}
2246
2247unsigned long zone_reclaimable_pages(struct zone *zone)
2248{
2249 int nr;
2250
2251 nr = zone_page_state(zone, NR_ACTIVE_FILE) +
2252 zone_page_state(zone, NR_INACTIVE_FILE);
2253
2254 if (nr_swap_pages > 0)
2255 nr += zone_page_state(zone, NR_ACTIVE_ANON) +
2256 zone_page_state(zone, NR_INACTIVE_ANON);
2257
2258 return nr;
2259}
2260
2261#ifdef CONFIG_HIBERNATION
2262
2263
2264
2265
2266
2267
2268static void shrink_all_zones(unsigned long nr_pages, int prio,
2269 int pass, struct scan_control *sc)
2270{
2271 struct zone *zone;
2272 unsigned long nr_reclaimed = 0;
2273 struct zone_reclaim_stat *reclaim_stat;
2274
2275 for_each_populated_zone(zone) {
2276 enum lru_list l;
2277
2278 if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
2279 continue;
2280
2281 for_each_evictable_lru(l) {
2282 enum zone_stat_item ls = NR_LRU_BASE + l;
2283 unsigned long lru_pages = zone_page_state(zone, ls);
2284
2285
2286 if (pass == 0 && (l == LRU_ACTIVE_ANON ||
2287 l == LRU_ACTIVE_FILE))
2288 continue;
2289
2290 reclaim_stat = get_reclaim_stat(zone, sc);
2291 reclaim_stat->nr_saved_scan[l] +=
2292 (lru_pages >> prio) + 1;
2293 if (reclaim_stat->nr_saved_scan[l]
2294 >= nr_pages || pass > 3) {
2295 unsigned long nr_to_scan;
2296
2297 reclaim_stat->nr_saved_scan[l] = 0;
2298 nr_to_scan = min(nr_pages, lru_pages);
2299 nr_reclaimed += shrink_list(l, nr_to_scan, zone,
2300 sc, prio);
2301 if (nr_reclaimed >= nr_pages) {
2302 sc->nr_reclaimed += nr_reclaimed;
2303 return;
2304 }
2305 }
2306 }
2307 }
2308 sc->nr_reclaimed += nr_reclaimed;
2309}
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319unsigned long shrink_all_memory(unsigned long nr_pages)
2320{
2321 unsigned long lru_pages, nr_slab;
2322 int pass;
2323 struct reclaim_state reclaim_state;
2324 struct scan_control sc = {
2325 .gfp_mask = GFP_KERNEL,
2326 .may_unmap = 0,
2327 .may_writepage = 1,
2328 .isolate_pages = isolate_pages_global,
2329 .nr_reclaimed = 0,
2330 };
2331
2332 current->reclaim_state = &reclaim_state;
2333
2334 lru_pages = global_reclaimable_pages();
2335 nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
2336
2337 while (nr_slab >= lru_pages) {
2338 reclaim_state.reclaimed_slab = 0;
2339 shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
2340 if (!reclaim_state.reclaimed_slab)
2341 break;
2342
2343 sc.nr_reclaimed += reclaim_state.reclaimed_slab;
2344 if (sc.nr_reclaimed >= nr_pages)
2345 goto out;
2346
2347 nr_slab -= reclaim_state.reclaimed_slab;
2348 }
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358 for (pass = 0; pass < 5; pass++) {
2359 int prio;
2360
2361
2362 if (pass > 2)
2363 sc.may_unmap = 1;
2364
2365 for (prio = DEF_PRIORITY; prio >= 0; prio--) {
2366 unsigned long nr_to_scan = nr_pages - sc.nr_reclaimed;
2367
2368 sc.nr_scanned = 0;
2369 sc.swap_cluster_max = nr_to_scan;
2370 shrink_all_zones(nr_to_scan, prio, pass, &sc);
2371 if (sc.nr_reclaimed >= nr_pages)
2372 goto out;
2373
2374 reclaim_state.reclaimed_slab = 0;
2375 shrink_slab(sc.nr_scanned, sc.gfp_mask,
2376 global_reclaimable_pages());
2377 sc.nr_reclaimed += reclaim_state.reclaimed_slab;
2378 if (sc.nr_reclaimed >= nr_pages)
2379 goto out;
2380
2381 if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
2382 congestion_wait(BLK_RW_ASYNC, HZ / 10);
2383 }
2384 }
2385
2386
2387
2388
2389
2390 if (!sc.nr_reclaimed) {
2391 do {
2392 reclaim_state.reclaimed_slab = 0;
2393 shrink_slab(nr_pages, sc.gfp_mask,
2394 global_reclaimable_pages());
2395 sc.nr_reclaimed += reclaim_state.reclaimed_slab;
2396 } while (sc.nr_reclaimed < nr_pages &&
2397 reclaim_state.reclaimed_slab > 0);
2398 }
2399
2400
2401out:
2402 current->reclaim_state = NULL;
2403
2404 return sc.nr_reclaimed;
2405}
2406#endif
2407
2408
2409
2410
2411
2412static int __devinit cpu_callback(struct notifier_block *nfb,
2413 unsigned long action, void *hcpu)
2414{
2415 int nid;
2416
2417 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
2418 for_each_node_state(nid, N_HIGH_MEMORY) {
2419 pg_data_t *pgdat = NODE_DATA(nid);
2420 const struct cpumask *mask;
2421
2422 mask = cpumask_of_node(pgdat->node_id);
2423
2424 if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
2425
2426 set_cpus_allowed_ptr(pgdat->kswapd, mask);
2427 }
2428 }
2429 return NOTIFY_OK;
2430}
2431
2432
2433
2434
2435
2436int kswapd_run(int nid)
2437{
2438 pg_data_t *pgdat = NODE_DATA(nid);
2439 int ret = 0;
2440
2441 if (pgdat->kswapd)
2442 return 0;
2443
2444 pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
2445 if (IS_ERR(pgdat->kswapd)) {
2446
2447 BUG_ON(system_state == SYSTEM_BOOTING);
2448 printk("Failed to start kswapd on node %d\n",nid);
2449 ret = -1;
2450 }
2451 return ret;
2452}
2453
2454static int __init kswapd_init(void)
2455{
2456 int nid;
2457
2458 swap_setup();
2459 for_each_node_state(nid, N_HIGH_MEMORY)
2460 kswapd_run(nid);
2461 hotcpu_notifier(cpu_callback, 0);
2462 return 0;
2463}
2464
2465module_init(kswapd_init)
2466
2467#ifdef CONFIG_NUMA
2468
2469
2470
2471
2472
2473
2474int zone_reclaim_mode __read_mostly;
2475
2476#define RECLAIM_OFF 0
2477#define RECLAIM_ZONE (1<<0)
2478#define RECLAIM_WRITE (1<<1)
2479#define RECLAIM_SWAP (1<<2)
2480
2481
2482
2483
2484
2485
2486#define ZONE_RECLAIM_PRIORITY 4
2487
2488
2489
2490
2491
2492int sysctl_min_unmapped_ratio = 1;
2493
2494
2495
2496
2497
2498int sysctl_min_slab_ratio = 5;
2499
2500static inline unsigned long zone_unmapped_file_pages(struct zone *zone)
2501{
2502 unsigned long file_mapped = zone_page_state(zone, NR_FILE_MAPPED);
2503 unsigned long file_lru = zone_page_state(zone, NR_INACTIVE_FILE) +
2504 zone_page_state(zone, NR_ACTIVE_FILE);
2505
2506
2507
2508
2509
2510
2511 return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0;
2512}
2513
2514
2515static long zone_pagecache_reclaimable(struct zone *zone)
2516{
2517 long nr_pagecache_reclaimable;
2518 long delta = 0;
2519
2520
2521
2522
2523
2524
2525
2526 if (zone_reclaim_mode & RECLAIM_SWAP)
2527 nr_pagecache_reclaimable = zone_page_state(zone, NR_FILE_PAGES);
2528 else
2529 nr_pagecache_reclaimable = zone_unmapped_file_pages(zone);
2530
2531
2532 if (!(zone_reclaim_mode & RECLAIM_WRITE))
2533 delta += zone_page_state(zone, NR_FILE_DIRTY);
2534
2535
2536 if (unlikely(delta > nr_pagecache_reclaimable))
2537 delta = nr_pagecache_reclaimable;
2538
2539 return nr_pagecache_reclaimable - delta;
2540}
2541
2542
2543
2544
2545static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
2546{
2547
2548 const unsigned long nr_pages = 1 << order;
2549 struct task_struct *p = current;
2550 struct reclaim_state reclaim_state;
2551 int priority;
2552 struct scan_control sc = {
2553 .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
2554 .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
2555 .may_swap = 1,
2556 .swap_cluster_max = max_t(unsigned long, nr_pages,
2557 SWAP_CLUSTER_MAX),
2558 .gfp_mask = gfp_mask,
2559 .swappiness = vm_swappiness,
2560 .order = order,
2561 .isolate_pages = isolate_pages_global,
2562 };
2563 unsigned long slab_reclaimable;
2564
2565 disable_swap_token();
2566 cond_resched();
2567
2568
2569
2570
2571
2572 p->flags |= PF_MEMALLOC | PF_SWAPWRITE;
2573 reclaim_state.reclaimed_slab = 0;
2574 p->reclaim_state = &reclaim_state;
2575
2576 if (zone_pagecache_reclaimable(zone) > zone->min_unmapped_pages) {
2577
2578
2579
2580
2581 priority = ZONE_RECLAIM_PRIORITY;
2582 do {
2583 note_zone_scanning_priority(zone, priority);
2584 shrink_zone(priority, zone, &sc);
2585 priority--;
2586 } while (priority >= 0 && sc.nr_reclaimed < nr_pages);
2587 }
2588
2589 slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
2590 if (slab_reclaimable > zone->min_slab_pages) {
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601 while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
2602 zone_page_state(zone, NR_SLAB_RECLAIMABLE) >
2603 slab_reclaimable - nr_pages)
2604 ;
2605
2606
2607
2608
2609
2610 sc.nr_reclaimed += slab_reclaimable -
2611 zone_page_state(zone, NR_SLAB_RECLAIMABLE);
2612 }
2613
2614 p->reclaim_state = NULL;
2615 current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
2616 return sc.nr_reclaimed >= nr_pages;
2617}
2618
2619int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
2620{
2621 int node_id;
2622 int ret;
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634 if (zone_pagecache_reclaimable(zone) <= zone->min_unmapped_pages &&
2635 zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
2636 return ZONE_RECLAIM_FULL;
2637
2638 if (zone_is_all_unreclaimable(zone))
2639 return ZONE_RECLAIM_FULL;
2640
2641
2642
2643
2644 if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC))
2645 return ZONE_RECLAIM_NOSCAN;
2646
2647
2648
2649
2650
2651
2652
2653 node_id = zone_to_nid(zone);
2654 if (node_state(node_id, N_CPU) && node_id != numa_node_id())
2655 return ZONE_RECLAIM_NOSCAN;
2656
2657 if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED))
2658 return ZONE_RECLAIM_NOSCAN;
2659
2660 ret = __zone_reclaim(zone, gfp_mask, order);
2661 zone_clear_flag(zone, ZONE_RECLAIM_LOCKED);
2662
2663 if (!ret)
2664 count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);
2665
2666 return ret;
2667}
2668#endif
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684int page_evictable(struct page *page, struct vm_area_struct *vma)
2685{
2686
2687 if (mapping_unevictable(page_mapping(page)))
2688 return 0;
2689
2690 if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page)))
2691 return 0;
2692
2693 return 1;
2694}
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707static void check_move_unevictable_page(struct page *page, struct zone *zone)
2708{
2709 VM_BUG_ON(PageActive(page));
2710
2711retry:
2712 ClearPageUnevictable(page);
2713 if (page_evictable(page, NULL)) {
2714 enum lru_list l = page_lru_base_type(page);
2715
2716 __dec_zone_state(zone, NR_UNEVICTABLE);
2717 list_move(&page->lru, &zone->lru[l].list);
2718 mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l);
2719 __inc_zone_state(zone, NR_INACTIVE_ANON + l);
2720 __count_vm_event(UNEVICTABLE_PGRESCUED);
2721 } else {
2722
2723
2724
2725 SetPageUnevictable(page);
2726 list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
2727 mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE);
2728 if (page_evictable(page, NULL))
2729 goto retry;
2730 }
2731}
2732
2733
2734
2735
2736
2737
2738
2739
2740void scan_mapping_unevictable_pages(struct address_space *mapping)
2741{
2742 pgoff_t next = 0;
2743 pgoff_t end = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >>
2744 PAGE_CACHE_SHIFT;
2745 struct zone *zone;
2746 struct pagevec pvec;
2747
2748 if (mapping->nrpages == 0)
2749 return;
2750
2751 pagevec_init(&pvec, 0);
2752 while (next < end &&
2753 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
2754 int i;
2755 int pg_scanned = 0;
2756
2757 zone = NULL;
2758
2759 for (i = 0; i < pagevec_count(&pvec); i++) {
2760 struct page *page = pvec.pages[i];
2761 pgoff_t page_index = page->index;
2762 struct zone *pagezone = page_zone(page);
2763
2764 pg_scanned++;
2765 if (page_index > next)
2766 next = page_index;
2767 next++;
2768
2769 if (pagezone != zone) {
2770 if (zone)
2771 spin_unlock_irq(&zone->lru_lock);
2772 zone = pagezone;
2773 spin_lock_irq(&zone->lru_lock);
2774 }
2775
2776 if (PageLRU(page) && PageUnevictable(page))
2777 check_move_unevictable_page(page, zone);
2778 }
2779 if (zone)
2780 spin_unlock_irq(&zone->lru_lock);
2781 pagevec_release(&pvec);
2782
2783 count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
2784 }
2785
2786}
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798#define SCAN_UNEVICTABLE_BATCH_SIZE 16UL
2799static void scan_zone_unevictable_pages(struct zone *zone)
2800{
2801 struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list;
2802 unsigned long scan;
2803 unsigned long nr_to_scan = zone_page_state(zone, NR_UNEVICTABLE);
2804
2805 while (nr_to_scan > 0) {
2806 unsigned long batch_size = min(nr_to_scan,
2807 SCAN_UNEVICTABLE_BATCH_SIZE);
2808
2809 spin_lock_irq(&zone->lru_lock);
2810 for (scan = 0; scan < batch_size; scan++) {
2811 struct page *page = lru_to_page(l_unevictable);
2812
2813 if (!trylock_page(page))
2814 continue;
2815
2816 prefetchw_prev_lru_page(page, l_unevictable, flags);
2817
2818 if (likely(PageLRU(page) && PageUnevictable(page)))
2819 check_move_unevictable_page(page, zone);
2820
2821 unlock_page(page);
2822 }
2823 spin_unlock_irq(&zone->lru_lock);
2824
2825 nr_to_scan -= batch_size;
2826 }
2827}
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841static void scan_all_zones_unevictable_pages(void)
2842{
2843 struct zone *zone;
2844
2845 for_each_zone(zone) {
2846 scan_zone_unevictable_pages(zone);
2847 }
2848}
2849
2850
2851
2852
2853
2854unsigned long scan_unevictable_pages;
2855
2856int scan_unevictable_handler(struct ctl_table *table, int write,
2857 void __user *buffer,
2858 size_t *length, loff_t *ppos)
2859{
2860 proc_doulongvec_minmax(table, write, buffer, length, ppos);
2861
2862 if (write && *(unsigned long *)table->data)
2863 scan_all_zones_unevictable_pages();
2864
2865 scan_unevictable_pages = 0;
2866 return 0;
2867}
2868
2869
2870
2871
2872
2873
2874static ssize_t read_scan_unevictable_node(struct sys_device *dev,
2875 struct sysdev_attribute *attr,
2876 char *buf)
2877{
2878 return sprintf(buf, "0\n");
2879}
2880
2881static ssize_t write_scan_unevictable_node(struct sys_device *dev,
2882 struct sysdev_attribute *attr,
2883 const char *buf, size_t count)
2884{
2885 struct zone *node_zones = NODE_DATA(dev->id)->node_zones;
2886 struct zone *zone;
2887 unsigned long res;
2888 unsigned long req = strict_strtoul(buf, 10, &res);
2889
2890 if (!req)
2891 return 1;
2892
2893 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
2894 if (!populated_zone(zone))
2895 continue;
2896 scan_zone_unevictable_pages(zone);
2897 }
2898 return 1;
2899}
2900
2901
2902static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
2903 read_scan_unevictable_node,
2904 write_scan_unevictable_node);
2905
2906int scan_unevictable_register_node(struct node *node)
2907{
2908 return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages);
2909}
2910
2911void scan_unevictable_unregister_node(struct node *node)
2912{
2913 sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
2914}
2915
2916