1
2
3
4
5
6
7
8#include <linux/mm.h>
9#include <linux/hugetlb.h>
10#include <linux/mman.h>
11#include <linux/slab.h>
12#include <linux/kernel_stat.h>
13#include <linux/swap.h>
14#include <linux/vmalloc.h>
15#include <linux/pagemap.h>
16#include <linux/namei.h>
17#include <linux/shm.h>
18#include <linux/blkdev.h>
19#include <linux/random.h>
20#include <linux/writeback.h>
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/init.h>
24#include <linux/module.h>
25#include <linux/rmap.h>
26#include <linux/security.h>
27#include <linux/backing-dev.h>
28#include <linux/mutex.h>
29#include <linux/capability.h>
30#include <linux/syscalls.h>
31#include <linux/memcontrol.h>
32
33#include <asm/pgtable.h>
34#include <asm/tlbflush.h>
35#include <linux/swapops.h>
36#include <linux/page_cgroup.h>
37
38static DEFINE_SPINLOCK(swap_lock);
39static unsigned int nr_swapfiles;
40long nr_swap_pages;
41long total_swap_pages;
42static int swap_overflow;
43static int least_priority;
44
45static const char Bad_file[] = "Bad swap file entry ";
46static const char Unused_file[] = "Unused swap file entry ";
47static const char Bad_offset[] = "Bad swap offset entry ";
48static const char Unused_offset[] = "Unused swap offset entry ";
49
50static struct swap_list_t swap_list = {-1, -1};
51
52static struct swap_info_struct swap_info[MAX_SWAPFILES];
53
54static DEFINE_MUTEX(swapon_mutex);
55
56
57
58enum {
59 SWAP_MAP = 0,
60 SWAP_CACHE,
61};
62
63static inline int swap_count(unsigned short ent)
64{
65 return ent & SWAP_COUNT_MASK;
66}
67
68static inline bool swap_has_cache(unsigned short ent)
69{
70 return !!(ent & SWAP_HAS_CACHE);
71}
72
73static inline unsigned short encode_swapmap(int count, bool has_cache)
74{
75 unsigned short ret = count;
76
77 if (has_cache)
78 return SWAP_HAS_CACHE | ret;
79 return ret;
80}
81
82
83static int
84__try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
85{
86 int type = si - swap_info;
87 swp_entry_t entry = swp_entry(type, offset);
88 struct page *page;
89 int ret = 0;
90
91 page = find_get_page(&swapper_space, entry.val);
92 if (!page)
93 return 0;
94
95
96
97
98
99
100
101 if (trylock_page(page)) {
102 ret = try_to_free_swap(page);
103 unlock_page(page);
104 }
105 page_cache_release(page);
106 return ret;
107}
108
109
110
111
112
113
114static DECLARE_RWSEM(swap_unplug_sem);
115
116void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
117{
118 swp_entry_t entry;
119
120 down_read(&swap_unplug_sem);
121 entry.val = page_private(page);
122 if (PageSwapCache(page)) {
123 struct block_device *bdev = swap_info[swp_type(entry)].bdev;
124 struct backing_dev_info *bdi;
125
126
127
128
129
130
131
132
133
134 WARN_ON(page_count(page) <= 1);
135
136 bdi = bdev->bd_inode->i_mapping->backing_dev_info;
137 blk_run_backing_dev(bdi, page);
138 }
139 up_read(&swap_unplug_sem);
140}
141
142
143
144
145
146static int discard_swap(struct swap_info_struct *si)
147{
148 struct swap_extent *se;
149 int err = 0;
150
151 list_for_each_entry(se, &si->extent_list, list) {
152 sector_t start_block = se->start_block << (PAGE_SHIFT - 9);
153 sector_t nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
154
155 if (se->start_page == 0) {
156
157 start_block += 1 << (PAGE_SHIFT - 9);
158 nr_blocks -= 1 << (PAGE_SHIFT - 9);
159 if (!nr_blocks)
160 continue;
161 }
162
163 err = blkdev_issue_discard(si->bdev, start_block,
164 nr_blocks, GFP_KERNEL,
165 DISCARD_FL_BARRIER);
166 if (err)
167 break;
168
169 cond_resched();
170 }
171 return err;
172}
173
174
175
176
177
178static void discard_swap_cluster(struct swap_info_struct *si,
179 pgoff_t start_page, pgoff_t nr_pages)
180{
181 struct swap_extent *se = si->curr_swap_extent;
182 int found_extent = 0;
183
184 while (nr_pages) {
185 struct list_head *lh;
186
187 if (se->start_page <= start_page &&
188 start_page < se->start_page + se->nr_pages) {
189 pgoff_t offset = start_page - se->start_page;
190 sector_t start_block = se->start_block + offset;
191 sector_t nr_blocks = se->nr_pages - offset;
192
193 if (nr_blocks > nr_pages)
194 nr_blocks = nr_pages;
195 start_page += nr_blocks;
196 nr_pages -= nr_blocks;
197
198 if (!found_extent++)
199 si->curr_swap_extent = se;
200
201 start_block <<= PAGE_SHIFT - 9;
202 nr_blocks <<= PAGE_SHIFT - 9;
203 if (blkdev_issue_discard(si->bdev, start_block,
204 nr_blocks, GFP_NOIO,
205 DISCARD_FL_BARRIER))
206 break;
207 }
208
209 lh = se->list.next;
210 if (lh == &si->extent_list)
211 lh = lh->next;
212 se = list_entry(lh, struct swap_extent, list);
213 }
214}
215
216static int wait_for_discard(void *word)
217{
218 schedule();
219 return 0;
220}
221
222#define SWAPFILE_CLUSTER 256
223#define LATENCY_LIMIT 256
224
225static inline unsigned long scan_swap_map(struct swap_info_struct *si,
226 int cache)
227{
228 unsigned long offset;
229 unsigned long scan_base;
230 unsigned long last_in_cluster = 0;
231 int latency_ration = LATENCY_LIMIT;
232 int found_free_cluster = 0;
233
234
235
236
237
238
239
240
241
242
243
244
245 si->flags += SWP_SCANNING;
246 scan_base = offset = si->cluster_next;
247
248 if (unlikely(!si->cluster_nr--)) {
249 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) {
250 si->cluster_nr = SWAPFILE_CLUSTER - 1;
251 goto checks;
252 }
253 if (si->flags & SWP_DISCARDABLE) {
254
255
256
257
258
259
260
261 if (si->lowest_alloc)
262 goto checks;
263 si->lowest_alloc = si->max;
264 si->highest_alloc = 0;
265 }
266 spin_unlock(&swap_lock);
267
268
269
270
271
272
273
274
275
276 if (!(si->flags & SWP_SOLIDSTATE))
277 scan_base = offset = si->lowest_bit;
278 last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
279
280
281 for (; last_in_cluster <= si->highest_bit; offset++) {
282 if (si->swap_map[offset])
283 last_in_cluster = offset + SWAPFILE_CLUSTER;
284 else if (offset == last_in_cluster) {
285 spin_lock(&swap_lock);
286 offset -= SWAPFILE_CLUSTER - 1;
287 si->cluster_next = offset;
288 si->cluster_nr = SWAPFILE_CLUSTER - 1;
289 found_free_cluster = 1;
290 goto checks;
291 }
292 if (unlikely(--latency_ration < 0)) {
293 cond_resched();
294 latency_ration = LATENCY_LIMIT;
295 }
296 }
297
298 offset = si->lowest_bit;
299 last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
300
301
302 for (; last_in_cluster < scan_base; offset++) {
303 if (si->swap_map[offset])
304 last_in_cluster = offset + SWAPFILE_CLUSTER;
305 else if (offset == last_in_cluster) {
306 spin_lock(&swap_lock);
307 offset -= SWAPFILE_CLUSTER - 1;
308 si->cluster_next = offset;
309 si->cluster_nr = SWAPFILE_CLUSTER - 1;
310 found_free_cluster = 1;
311 goto checks;
312 }
313 if (unlikely(--latency_ration < 0)) {
314 cond_resched();
315 latency_ration = LATENCY_LIMIT;
316 }
317 }
318
319 offset = scan_base;
320 spin_lock(&swap_lock);
321 si->cluster_nr = SWAPFILE_CLUSTER - 1;
322 si->lowest_alloc = 0;
323 }
324
325checks:
326 if (!(si->flags & SWP_WRITEOK))
327 goto no_page;
328 if (!si->highest_bit)
329 goto no_page;
330 if (offset > si->highest_bit)
331 scan_base = offset = si->lowest_bit;
332
333
334 if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
335 int swap_was_freed;
336 spin_unlock(&swap_lock);
337 swap_was_freed = __try_to_reclaim_swap(si, offset);
338 spin_lock(&swap_lock);
339
340 if (swap_was_freed)
341 goto checks;
342 goto scan;
343 }
344
345 if (si->swap_map[offset])
346 goto scan;
347
348 if (offset == si->lowest_bit)
349 si->lowest_bit++;
350 if (offset == si->highest_bit)
351 si->highest_bit--;
352 si->inuse_pages++;
353 if (si->inuse_pages == si->pages) {
354 si->lowest_bit = si->max;
355 si->highest_bit = 0;
356 }
357 if (cache == SWAP_CACHE)
358 si->swap_map[offset] = encode_swapmap(0, true);
359 else
360 si->swap_map[offset] = encode_swapmap(1, false);
361 si->cluster_next = offset + 1;
362 si->flags -= SWP_SCANNING;
363
364 if (si->lowest_alloc) {
365
366
367
368
369 if (found_free_cluster) {
370
371
372
373
374
375
376
377 if (offset < si->highest_alloc &&
378 si->lowest_alloc <= last_in_cluster)
379 last_in_cluster = si->lowest_alloc - 1;
380 si->flags |= SWP_DISCARDING;
381 spin_unlock(&swap_lock);
382
383 if (offset < last_in_cluster)
384 discard_swap_cluster(si, offset,
385 last_in_cluster - offset + 1);
386
387 spin_lock(&swap_lock);
388 si->lowest_alloc = 0;
389 si->flags &= ~SWP_DISCARDING;
390
391 smp_mb();
392 wake_up_bit(&si->flags, ilog2(SWP_DISCARDING));
393
394 } else if (si->flags & SWP_DISCARDING) {
395
396
397
398
399
400
401 spin_unlock(&swap_lock);
402 wait_on_bit(&si->flags, ilog2(SWP_DISCARDING),
403 wait_for_discard, TASK_UNINTERRUPTIBLE);
404 spin_lock(&swap_lock);
405 } else {
406
407
408
409
410
411 if (offset < si->lowest_alloc)
412 si->lowest_alloc = offset;
413 if (offset > si->highest_alloc)
414 si->highest_alloc = offset;
415 }
416 }
417 return offset;
418
419scan:
420 spin_unlock(&swap_lock);
421 while (++offset <= si->highest_bit) {
422 if (!si->swap_map[offset]) {
423 spin_lock(&swap_lock);
424 goto checks;
425 }
426 if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
427 spin_lock(&swap_lock);
428 goto checks;
429 }
430 if (unlikely(--latency_ration < 0)) {
431 cond_resched();
432 latency_ration = LATENCY_LIMIT;
433 }
434 }
435 offset = si->lowest_bit;
436 while (++offset < scan_base) {
437 if (!si->swap_map[offset]) {
438 spin_lock(&swap_lock);
439 goto checks;
440 }
441 if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
442 spin_lock(&swap_lock);
443 goto checks;
444 }
445 if (unlikely(--latency_ration < 0)) {
446 cond_resched();
447 latency_ration = LATENCY_LIMIT;
448 }
449 }
450 spin_lock(&swap_lock);
451
452no_page:
453 si->flags -= SWP_SCANNING;
454 return 0;
455}
456
457swp_entry_t get_swap_page(void)
458{
459 struct swap_info_struct *si;
460 pgoff_t offset;
461 int type, next;
462 int wrapped = 0;
463
464 spin_lock(&swap_lock);
465 if (nr_swap_pages <= 0)
466 goto noswap;
467 nr_swap_pages--;
468
469 for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) {
470 si = swap_info + type;
471 next = si->next;
472 if (next < 0 ||
473 (!wrapped && si->prio != swap_info[next].prio)) {
474 next = swap_list.head;
475 wrapped++;
476 }
477
478 if (!si->highest_bit)
479 continue;
480 if (!(si->flags & SWP_WRITEOK))
481 continue;
482
483 swap_list.next = next;
484
485 offset = scan_swap_map(si, SWAP_CACHE);
486 if (offset) {
487 spin_unlock(&swap_lock);
488 return swp_entry(type, offset);
489 }
490 next = swap_list.next;
491 }
492
493 nr_swap_pages++;
494noswap:
495 spin_unlock(&swap_lock);
496 return (swp_entry_t) {0};
497}
498
499
500swp_entry_t get_swap_page_of_type(int type)
501{
502 struct swap_info_struct *si;
503 pgoff_t offset;
504
505 spin_lock(&swap_lock);
506 si = swap_info + type;
507 if (si->flags & SWP_WRITEOK) {
508 nr_swap_pages--;
509
510 offset = scan_swap_map(si, SWAP_MAP);
511 if (offset) {
512 spin_unlock(&swap_lock);
513 return swp_entry(type, offset);
514 }
515 nr_swap_pages++;
516 }
517 spin_unlock(&swap_lock);
518 return (swp_entry_t) {0};
519}
520
521static struct swap_info_struct * swap_info_get(swp_entry_t entry)
522{
523 struct swap_info_struct * p;
524 unsigned long offset, type;
525
526 if (!entry.val)
527 goto out;
528 type = swp_type(entry);
529 if (type >= nr_swapfiles)
530 goto bad_nofile;
531 p = & swap_info[type];
532 if (!(p->flags & SWP_USED))
533 goto bad_device;
534 offset = swp_offset(entry);
535 if (offset >= p->max)
536 goto bad_offset;
537 if (!p->swap_map[offset])
538 goto bad_free;
539 spin_lock(&swap_lock);
540 return p;
541
542bad_free:
543 printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val);
544 goto out;
545bad_offset:
546 printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val);
547 goto out;
548bad_device:
549 printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val);
550 goto out;
551bad_nofile:
552 printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val);
553out:
554 return NULL;
555}
556
557static int swap_entry_free(struct swap_info_struct *p,
558 swp_entry_t ent, int cache)
559{
560 unsigned long offset = swp_offset(ent);
561 int count = swap_count(p->swap_map[offset]);
562 bool has_cache;
563
564 has_cache = swap_has_cache(p->swap_map[offset]);
565
566 if (cache == SWAP_MAP) {
567 if (count < SWAP_MAP_MAX) {
568 count--;
569 p->swap_map[offset] = encode_swapmap(count, has_cache);
570 }
571 } else {
572 VM_BUG_ON(!has_cache);
573 p->swap_map[offset] = encode_swapmap(count, false);
574
575 }
576
577 count = p->swap_map[offset];
578
579 if (!count) {
580 if (offset < p->lowest_bit)
581 p->lowest_bit = offset;
582 if (offset > p->highest_bit)
583 p->highest_bit = offset;
584 if (p->prio > swap_info[swap_list.next].prio)
585 swap_list.next = p - swap_info;
586 nr_swap_pages++;
587 p->inuse_pages--;
588 }
589 if (!swap_count(count))
590 mem_cgroup_uncharge_swap(ent);
591 return count;
592}
593
594
595
596
597
598void swap_free(swp_entry_t entry)
599{
600 struct swap_info_struct * p;
601
602 p = swap_info_get(entry);
603 if (p) {
604 swap_entry_free(p, entry, SWAP_MAP);
605 spin_unlock(&swap_lock);
606 }
607}
608
609
610
611
612void swapcache_free(swp_entry_t entry, struct page *page)
613{
614 struct swap_info_struct *p;
615 int ret;
616
617 p = swap_info_get(entry);
618 if (p) {
619 ret = swap_entry_free(p, entry, SWAP_CACHE);
620 if (page) {
621 bool swapout;
622 if (ret)
623 swapout = true;
624 else
625 swapout = false;
626 mem_cgroup_uncharge_swapcache(page, entry, swapout);
627 }
628 spin_unlock(&swap_lock);
629 }
630 return;
631}
632
633
634
635
636static inline int page_swapcount(struct page *page)
637{
638 int count = 0;
639 struct swap_info_struct *p;
640 swp_entry_t entry;
641
642 entry.val = page_private(page);
643 p = swap_info_get(entry);
644 if (p) {
645 count = swap_count(p->swap_map[swp_offset(entry)]);
646 spin_unlock(&swap_lock);
647 }
648 return count;
649}
650
651
652
653
654
655
656
657int reuse_swap_page(struct page *page)
658{
659 int count;
660
661 VM_BUG_ON(!PageLocked(page));
662 count = page_mapcount(page);
663 if (count <= 1 && PageSwapCache(page)) {
664 count += page_swapcount(page);
665 if (count == 1 && !PageWriteback(page)) {
666 delete_from_swap_cache(page);
667 SetPageDirty(page);
668 }
669 }
670 return count == 1;
671}
672
673
674
675
676
677int try_to_free_swap(struct page *page)
678{
679 VM_BUG_ON(!PageLocked(page));
680
681 if (!PageSwapCache(page))
682 return 0;
683 if (PageWriteback(page))
684 return 0;
685 if (page_swapcount(page))
686 return 0;
687
688 delete_from_swap_cache(page);
689 SetPageDirty(page);
690 return 1;
691}
692
693
694
695
696
697int free_swap_and_cache(swp_entry_t entry)
698{
699 struct swap_info_struct *p;
700 struct page *page = NULL;
701
702 if (non_swap_entry(entry))
703 return 1;
704
705 p = swap_info_get(entry);
706 if (p) {
707 if (swap_entry_free(p, entry, SWAP_MAP) == SWAP_HAS_CACHE) {
708 page = find_get_page(&swapper_space, entry.val);
709 if (page && !trylock_page(page)) {
710 page_cache_release(page);
711 page = NULL;
712 }
713 }
714 spin_unlock(&swap_lock);
715 }
716 if (page) {
717
718
719
720
721 if (PageSwapCache(page) && !PageWriteback(page) &&
722 (!page_mapped(page) || vm_swap_full())) {
723 delete_from_swap_cache(page);
724 SetPageDirty(page);
725 }
726 unlock_page(page);
727 page_cache_release(page);
728 }
729 return p != NULL;
730}
731
732#ifdef CONFIG_HIBERNATION
733
734
735
736
737
738
739
740
741int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
742{
743 struct block_device *bdev = NULL;
744 int i;
745
746 if (device)
747 bdev = bdget(device);
748
749 spin_lock(&swap_lock);
750 for (i = 0; i < nr_swapfiles; i++) {
751 struct swap_info_struct *sis = swap_info + i;
752
753 if (!(sis->flags & SWP_WRITEOK))
754 continue;
755
756 if (!bdev) {
757 if (bdev_p)
758 *bdev_p = bdgrab(sis->bdev);
759
760 spin_unlock(&swap_lock);
761 return i;
762 }
763 if (bdev == sis->bdev) {
764 struct swap_extent *se;
765
766 se = list_entry(sis->extent_list.next,
767 struct swap_extent, list);
768 if (se->start_block == offset) {
769 if (bdev_p)
770 *bdev_p = bdgrab(sis->bdev);
771
772 spin_unlock(&swap_lock);
773 bdput(bdev);
774 return i;
775 }
776 }
777 }
778 spin_unlock(&swap_lock);
779 if (bdev)
780 bdput(bdev);
781
782 return -ENODEV;
783}
784
785
786
787
788
789
790
791unsigned int count_swap_pages(int type, int free)
792{
793 unsigned int n = 0;
794
795 if (type < nr_swapfiles) {
796 spin_lock(&swap_lock);
797 if (swap_info[type].flags & SWP_WRITEOK) {
798 n = swap_info[type].pages;
799 if (free)
800 n -= swap_info[type].inuse_pages;
801 }
802 spin_unlock(&swap_lock);
803 }
804 return n;
805}
806#endif
807
808
809
810
811
812
813static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
814 unsigned long addr, swp_entry_t entry, struct page *page)
815{
816 struct mem_cgroup *ptr = NULL;
817 spinlock_t *ptl;
818 pte_t *pte;
819 int ret = 1;
820
821 if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) {
822 ret = -ENOMEM;
823 goto out_nolock;
824 }
825
826 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
827 if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
828 if (ret > 0)
829 mem_cgroup_cancel_charge_swapin(ptr);
830 ret = 0;
831 goto out;
832 }
833
834 inc_mm_counter(vma->vm_mm, anon_rss);
835 get_page(page);
836 set_pte_at(vma->vm_mm, addr, pte,
837 pte_mkold(mk_pte(page, vma->vm_page_prot)));
838 page_add_anon_rmap(page, vma, addr);
839 mem_cgroup_commit_charge_swapin(page, ptr);
840 swap_free(entry);
841
842
843
844
845 activate_page(page);
846out:
847 pte_unmap_unlock(pte, ptl);
848out_nolock:
849 return ret;
850}
851
852static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
853 unsigned long addr, unsigned long end,
854 swp_entry_t entry, struct page *page)
855{
856 pte_t swp_pte = swp_entry_to_pte(entry);
857 pte_t *pte;
858 int ret = 0;
859
860
861
862
863
864
865
866
867
868
869 pte = pte_offset_map(pmd, addr);
870 do {
871
872
873
874
875 if (unlikely(pte_same(*pte, swp_pte))) {
876 pte_unmap(pte);
877 ret = unuse_pte(vma, pmd, addr, entry, page);
878 if (ret)
879 goto out;
880 pte = pte_offset_map(pmd, addr);
881 }
882 } while (pte++, addr += PAGE_SIZE, addr != end);
883 pte_unmap(pte - 1);
884out:
885 return ret;
886}
887
888static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
889 unsigned long addr, unsigned long end,
890 swp_entry_t entry, struct page *page)
891{
892 pmd_t *pmd;
893 unsigned long next;
894 int ret;
895
896 pmd = pmd_offset(pud, addr);
897 do {
898 next = pmd_addr_end(addr, end);
899 if (pmd_none_or_clear_bad(pmd))
900 continue;
901 ret = unuse_pte_range(vma, pmd, addr, next, entry, page);
902 if (ret)
903 return ret;
904 } while (pmd++, addr = next, addr != end);
905 return 0;
906}
907
908static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
909 unsigned long addr, unsigned long end,
910 swp_entry_t entry, struct page *page)
911{
912 pud_t *pud;
913 unsigned long next;
914 int ret;
915
916 pud = pud_offset(pgd, addr);
917 do {
918 next = pud_addr_end(addr, end);
919 if (pud_none_or_clear_bad(pud))
920 continue;
921 ret = unuse_pmd_range(vma, pud, addr, next, entry, page);
922 if (ret)
923 return ret;
924 } while (pud++, addr = next, addr != end);
925 return 0;
926}
927
928static int unuse_vma(struct vm_area_struct *vma,
929 swp_entry_t entry, struct page *page)
930{
931 pgd_t *pgd;
932 unsigned long addr, end, next;
933 int ret;
934
935 if (page->mapping) {
936 addr = page_address_in_vma(page, vma);
937 if (addr == -EFAULT)
938 return 0;
939 else
940 end = addr + PAGE_SIZE;
941 } else {
942 addr = vma->vm_start;
943 end = vma->vm_end;
944 }
945
946 pgd = pgd_offset(vma->vm_mm, addr);
947 do {
948 next = pgd_addr_end(addr, end);
949 if (pgd_none_or_clear_bad(pgd))
950 continue;
951 ret = unuse_pud_range(vma, pgd, addr, next, entry, page);
952 if (ret)
953 return ret;
954 } while (pgd++, addr = next, addr != end);
955 return 0;
956}
957
958static int unuse_mm(struct mm_struct *mm,
959 swp_entry_t entry, struct page *page)
960{
961 struct vm_area_struct *vma;
962 int ret = 0;
963
964 if (!down_read_trylock(&mm->mmap_sem)) {
965
966
967
968
969 activate_page(page);
970 unlock_page(page);
971 down_read(&mm->mmap_sem);
972 lock_page(page);
973 }
974 for (vma = mm->mmap; vma; vma = vma->vm_next) {
975 if (vma->anon_vma && (ret = unuse_vma(vma, entry, page)))
976 break;
977 }
978 up_read(&mm->mmap_sem);
979 return (ret < 0)? ret: 0;
980}
981
982
983
984
985
986static unsigned int find_next_to_unuse(struct swap_info_struct *si,
987 unsigned int prev)
988{
989 unsigned int max = si->max;
990 unsigned int i = prev;
991 int count;
992
993
994
995
996
997
998
999 for (;;) {
1000 if (++i >= max) {
1001 if (!prev) {
1002 i = 0;
1003 break;
1004 }
1005
1006
1007
1008
1009 max = prev + 1;
1010 prev = 0;
1011 i = 1;
1012 }
1013 count = si->swap_map[i];
1014 if (count && swap_count(count) != SWAP_MAP_BAD)
1015 break;
1016 }
1017 return i;
1018}
1019
1020
1021
1022
1023
1024
1025static int try_to_unuse(unsigned int type)
1026{
1027 struct swap_info_struct * si = &swap_info[type];
1028 struct mm_struct *start_mm;
1029 unsigned short *swap_map;
1030 unsigned short swcount;
1031 struct page *page;
1032 swp_entry_t entry;
1033 unsigned int i = 0;
1034 int retval = 0;
1035 int reset_overflow = 0;
1036 int shmem;
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053 start_mm = &init_mm;
1054 atomic_inc(&init_mm.mm_users);
1055
1056
1057
1058
1059
1060
1061 while ((i = find_next_to_unuse(si, i)) != 0) {
1062 if (signal_pending(current)) {
1063 retval = -EINTR;
1064 break;
1065 }
1066
1067
1068
1069
1070
1071
1072 swap_map = &si->swap_map[i];
1073 entry = swp_entry(type, i);
1074 page = read_swap_cache_async(entry,
1075 GFP_HIGHUSER_MOVABLE, NULL, 0);
1076 if (!page) {
1077
1078
1079
1080
1081
1082
1083 if (!*swap_map)
1084 continue;
1085 retval = -ENOMEM;
1086 break;
1087 }
1088
1089
1090
1091
1092 if (atomic_read(&start_mm->mm_users) == 1) {
1093 mmput(start_mm);
1094 start_mm = &init_mm;
1095 atomic_inc(&init_mm.mm_users);
1096 }
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106 wait_on_page_locked(page);
1107 wait_on_page_writeback(page);
1108 lock_page(page);
1109 wait_on_page_writeback(page);
1110
1111
1112
1113
1114
1115
1116 shmem = 0;
1117 swcount = *swap_map;
1118 if (swap_count(swcount)) {
1119 if (start_mm == &init_mm)
1120 shmem = shmem_unuse(entry, page);
1121 else
1122 retval = unuse_mm(start_mm, entry, page);
1123 }
1124 if (swap_count(*swap_map)) {
1125 int set_start_mm = (*swap_map >= swcount);
1126 struct list_head *p = &start_mm->mmlist;
1127 struct mm_struct *new_start_mm = start_mm;
1128 struct mm_struct *prev_mm = start_mm;
1129 struct mm_struct *mm;
1130
1131 atomic_inc(&new_start_mm->mm_users);
1132 atomic_inc(&prev_mm->mm_users);
1133 spin_lock(&mmlist_lock);
1134 while (swap_count(*swap_map) && !retval && !shmem &&
1135 (p = p->next) != &start_mm->mmlist) {
1136 mm = list_entry(p, struct mm_struct, mmlist);
1137 if (!atomic_inc_not_zero(&mm->mm_users))
1138 continue;
1139 spin_unlock(&mmlist_lock);
1140 mmput(prev_mm);
1141 prev_mm = mm;
1142
1143 cond_resched();
1144
1145 swcount = *swap_map;
1146 if (!swap_count(swcount))
1147 ;
1148 else if (mm == &init_mm) {
1149 set_start_mm = 1;
1150 shmem = shmem_unuse(entry, page);
1151 } else
1152 retval = unuse_mm(mm, entry, page);
1153
1154 if (set_start_mm && *swap_map < swcount) {
1155 mmput(new_start_mm);
1156 atomic_inc(&mm->mm_users);
1157 new_start_mm = mm;
1158 set_start_mm = 0;
1159 }
1160 spin_lock(&mmlist_lock);
1161 }
1162 spin_unlock(&mmlist_lock);
1163 mmput(prev_mm);
1164 mmput(start_mm);
1165 start_mm = new_start_mm;
1166 }
1167 if (shmem) {
1168
1169 if (shmem > 0)
1170 continue;
1171 retval = shmem;
1172 break;
1173 }
1174 if (retval) {
1175 unlock_page(page);
1176 page_cache_release(page);
1177 break;
1178 }
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194 if (!PageSwapCache(page) || page_private(page) != entry.val)
1195 goto retry;
1196
1197 if (swap_count(*swap_map) == SWAP_MAP_MAX) {
1198 spin_lock(&swap_lock);
1199 *swap_map = encode_swapmap(0, true);
1200 spin_unlock(&swap_lock);
1201 reset_overflow = 1;
1202 }
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217 if (swap_count(*swap_map) &&
1218 PageDirty(page) && PageSwapCache(page)) {
1219 struct writeback_control wbc = {
1220 .sync_mode = WB_SYNC_NONE,
1221 };
1222
1223 swap_writepage(page, &wbc);
1224 lock_page(page);
1225 wait_on_page_writeback(page);
1226 }
1227
1228
1229
1230
1231
1232
1233
1234
1235 if (PageSwapCache(page) &&
1236 likely(page_private(page) == entry.val))
1237 delete_from_swap_cache(page);
1238
1239
1240
1241
1242
1243
1244 SetPageDirty(page);
1245retry:
1246 unlock_page(page);
1247 page_cache_release(page);
1248
1249
1250
1251
1252
1253 cond_resched();
1254 }
1255
1256 mmput(start_mm);
1257 if (reset_overflow) {
1258 printk(KERN_WARNING "swapoff: cleared swap entry overflow\n");
1259 swap_overflow = 0;
1260 }
1261 return retval;
1262}
1263
1264
1265
1266
1267
1268
1269
1270static void drain_mmlist(void)
1271{
1272 struct list_head *p, *next;
1273 unsigned int i;
1274
1275 for (i = 0; i < nr_swapfiles; i++)
1276 if (swap_info[i].inuse_pages)
1277 return;
1278 spin_lock(&mmlist_lock);
1279 list_for_each_safe(p, next, &init_mm.mmlist)
1280 list_del_init(p);
1281 spin_unlock(&mmlist_lock);
1282}
1283
1284
1285
1286
1287
1288sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset)
1289{
1290 struct swap_extent *se = sis->curr_swap_extent;
1291 struct swap_extent *start_se = se;
1292
1293 for ( ; ; ) {
1294 struct list_head *lh;
1295
1296 if (se->start_page <= offset &&
1297 offset < (se->start_page + se->nr_pages)) {
1298 return se->start_block + (offset - se->start_page);
1299 }
1300 lh = se->list.next;
1301 if (lh == &sis->extent_list)
1302 lh = lh->next;
1303 se = list_entry(lh, struct swap_extent, list);
1304 sis->curr_swap_extent = se;
1305 BUG_ON(se == start_se);
1306 }
1307}
1308
1309#ifdef CONFIG_HIBERNATION
1310
1311
1312
1313
1314sector_t swapdev_block(int swap_type, pgoff_t offset)
1315{
1316 struct swap_info_struct *sis;
1317
1318 if (swap_type >= nr_swapfiles)
1319 return 0;
1320
1321 sis = swap_info + swap_type;
1322 return (sis->flags & SWP_WRITEOK) ? map_swap_page(sis, offset) : 0;
1323}
1324#endif
1325
1326
1327
1328
1329static void destroy_swap_extents(struct swap_info_struct *sis)
1330{
1331 while (!list_empty(&sis->extent_list)) {
1332 struct swap_extent *se;
1333
1334 se = list_entry(sis->extent_list.next,
1335 struct swap_extent, list);
1336 list_del(&se->list);
1337 kfree(se);
1338 }
1339}
1340
1341
1342
1343
1344
1345
1346
1347static int
1348add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
1349 unsigned long nr_pages, sector_t start_block)
1350{
1351 struct swap_extent *se;
1352 struct swap_extent *new_se;
1353 struct list_head *lh;
1354
1355 lh = sis->extent_list.prev;
1356 if (lh != &sis->extent_list) {
1357 se = list_entry(lh, struct swap_extent, list);
1358 BUG_ON(se->start_page + se->nr_pages != start_page);
1359 if (se->start_block + se->nr_pages == start_block) {
1360
1361 se->nr_pages += nr_pages;
1362 return 0;
1363 }
1364 }
1365
1366
1367
1368
1369 new_se = kmalloc(sizeof(*se), GFP_KERNEL);
1370 if (new_se == NULL)
1371 return -ENOMEM;
1372 new_se->start_page = start_page;
1373 new_se->nr_pages = nr_pages;
1374 new_se->start_block = start_block;
1375
1376 list_add_tail(&new_se->list, &sis->extent_list);
1377 return 1;
1378}
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
1412{
1413 struct inode *inode;
1414 unsigned blocks_per_page;
1415 unsigned long page_no;
1416 unsigned blkbits;
1417 sector_t probe_block;
1418 sector_t last_block;
1419 sector_t lowest_block = -1;
1420 sector_t highest_block = 0;
1421 int nr_extents = 0;
1422 int ret;
1423
1424 inode = sis->swap_file->f_mapping->host;
1425 if (S_ISBLK(inode->i_mode)) {
1426 ret = add_swap_extent(sis, 0, sis->max, 0);
1427 *span = sis->pages;
1428 goto done;
1429 }
1430
1431 blkbits = inode->i_blkbits;
1432 blocks_per_page = PAGE_SIZE >> blkbits;
1433
1434
1435
1436
1437
1438 probe_block = 0;
1439 page_no = 0;
1440 last_block = i_size_read(inode) >> blkbits;
1441 while ((probe_block + blocks_per_page) <= last_block &&
1442 page_no < sis->max) {
1443 unsigned block_in_page;
1444 sector_t first_block;
1445
1446 first_block = bmap(inode, probe_block);
1447 if (first_block == 0)
1448 goto bad_bmap;
1449
1450
1451
1452
1453 if (first_block & (blocks_per_page - 1)) {
1454 probe_block++;
1455 goto reprobe;
1456 }
1457
1458 for (block_in_page = 1; block_in_page < blocks_per_page;
1459 block_in_page++) {
1460 sector_t block;
1461
1462 block = bmap(inode, probe_block + block_in_page);
1463 if (block == 0)
1464 goto bad_bmap;
1465 if (block != first_block + block_in_page) {
1466
1467 probe_block++;
1468 goto reprobe;
1469 }
1470 }
1471
1472 first_block >>= (PAGE_SHIFT - blkbits);
1473 if (page_no) {
1474 if (first_block < lowest_block)
1475 lowest_block = first_block;
1476 if (first_block > highest_block)
1477 highest_block = first_block;
1478 }
1479
1480
1481
1482
1483 ret = add_swap_extent(sis, page_no, 1, first_block);
1484 if (ret < 0)
1485 goto out;
1486 nr_extents += ret;
1487 page_no++;
1488 probe_block += blocks_per_page;
1489reprobe:
1490 continue;
1491 }
1492 ret = nr_extents;
1493 *span = 1 + highest_block - lowest_block;
1494 if (page_no == 0)
1495 page_no = 1;
1496 sis->max = page_no;
1497 sis->pages = page_no - 1;
1498 sis->highest_bit = page_no - 1;
1499done:
1500 sis->curr_swap_extent = list_entry(sis->extent_list.prev,
1501 struct swap_extent, list);
1502 goto out;
1503bad_bmap:
1504 printk(KERN_ERR "swapon: swapfile has holes\n");
1505 ret = -EINVAL;
1506out:
1507 return ret;
1508}
1509
1510SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1511{
1512 struct swap_info_struct * p = NULL;
1513 unsigned short *swap_map;
1514 struct file *swap_file, *victim;
1515 struct address_space *mapping;
1516 struct inode *inode;
1517 char * pathname;
1518 int i, type, prev;
1519 int err;
1520
1521 if (!capable(CAP_SYS_ADMIN))
1522 return -EPERM;
1523
1524 pathname = getname(specialfile);
1525 err = PTR_ERR(pathname);
1526 if (IS_ERR(pathname))
1527 goto out;
1528
1529 victim = filp_open(pathname, O_RDWR|O_LARGEFILE, 0);
1530 putname(pathname);
1531 err = PTR_ERR(victim);
1532 if (IS_ERR(victim))
1533 goto out;
1534
1535 mapping = victim->f_mapping;
1536 prev = -1;
1537 spin_lock(&swap_lock);
1538 for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
1539 p = swap_info + type;
1540 if (p->flags & SWP_WRITEOK) {
1541 if (p->swap_file->f_mapping == mapping)
1542 break;
1543 }
1544 prev = type;
1545 }
1546 if (type < 0) {
1547 err = -EINVAL;
1548 spin_unlock(&swap_lock);
1549 goto out_dput;
1550 }
1551 if (!security_vm_enough_memory(p->pages))
1552 vm_unacct_memory(p->pages);
1553 else {
1554 err = -ENOMEM;
1555 spin_unlock(&swap_lock);
1556 goto out_dput;
1557 }
1558 if (prev < 0) {
1559 swap_list.head = p->next;
1560 } else {
1561 swap_info[prev].next = p->next;
1562 }
1563 if (type == swap_list.next) {
1564
1565 swap_list.next = swap_list.head;
1566 }
1567 if (p->prio < 0) {
1568 for (i = p->next; i >= 0; i = swap_info[i].next)
1569 swap_info[i].prio = p->prio--;
1570 least_priority++;
1571 }
1572 nr_swap_pages -= p->pages;
1573 total_swap_pages -= p->pages;
1574 p->flags &= ~SWP_WRITEOK;
1575 spin_unlock(&swap_lock);
1576
1577 current->flags |= PF_OOM_ORIGIN;
1578 err = try_to_unuse(type);
1579 current->flags &= ~PF_OOM_ORIGIN;
1580
1581 if (err) {
1582
1583 spin_lock(&swap_lock);
1584 if (p->prio < 0)
1585 p->prio = --least_priority;
1586 prev = -1;
1587 for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
1588 if (p->prio >= swap_info[i].prio)
1589 break;
1590 prev = i;
1591 }
1592 p->next = i;
1593 if (prev < 0)
1594 swap_list.head = swap_list.next = p - swap_info;
1595 else
1596 swap_info[prev].next = p - swap_info;
1597 nr_swap_pages += p->pages;
1598 total_swap_pages += p->pages;
1599 p->flags |= SWP_WRITEOK;
1600 spin_unlock(&swap_lock);
1601 goto out_dput;
1602 }
1603
1604
1605 down_write(&swap_unplug_sem);
1606 up_write(&swap_unplug_sem);
1607
1608 destroy_swap_extents(p);
1609 mutex_lock(&swapon_mutex);
1610 spin_lock(&swap_lock);
1611 drain_mmlist();
1612
1613
1614 p->highest_bit = 0;
1615 while (p->flags >= SWP_SCANNING) {
1616 spin_unlock(&swap_lock);
1617 schedule_timeout_uninterruptible(1);
1618 spin_lock(&swap_lock);
1619 }
1620
1621 swap_file = p->swap_file;
1622 p->swap_file = NULL;
1623 p->max = 0;
1624 swap_map = p->swap_map;
1625 p->swap_map = NULL;
1626 p->flags = 0;
1627 spin_unlock(&swap_lock);
1628 mutex_unlock(&swapon_mutex);
1629 vfree(swap_map);
1630
1631 swap_cgroup_swapoff(type);
1632
1633 inode = mapping->host;
1634 if (S_ISBLK(inode->i_mode)) {
1635 struct block_device *bdev = I_BDEV(inode);
1636 set_blocksize(bdev, p->old_block_size);
1637 bd_release(bdev);
1638 } else {
1639 mutex_lock(&inode->i_mutex);
1640 inode->i_flags &= ~S_SWAPFILE;
1641 mutex_unlock(&inode->i_mutex);
1642 }
1643 filp_close(swap_file, NULL);
1644 err = 0;
1645
1646out_dput:
1647 filp_close(victim, NULL);
1648out:
1649 return err;
1650}
1651
1652#ifdef CONFIG_PROC_FS
1653
1654static void *swap_start(struct seq_file *swap, loff_t *pos)
1655{
1656 struct swap_info_struct *ptr = swap_info;
1657 int i;
1658 loff_t l = *pos;
1659
1660 mutex_lock(&swapon_mutex);
1661
1662 if (!l)
1663 return SEQ_START_TOKEN;
1664
1665 for (i = 0; i < nr_swapfiles; i++, ptr++) {
1666 if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
1667 continue;
1668 if (!--l)
1669 return ptr;
1670 }
1671
1672 return NULL;
1673}
1674
1675static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
1676{
1677 struct swap_info_struct *ptr;
1678 struct swap_info_struct *endptr = swap_info + nr_swapfiles;
1679
1680 if (v == SEQ_START_TOKEN)
1681 ptr = swap_info;
1682 else {
1683 ptr = v;
1684 ptr++;
1685 }
1686
1687 for (; ptr < endptr; ptr++) {
1688 if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
1689 continue;
1690 ++*pos;
1691 return ptr;
1692 }
1693
1694 return NULL;
1695}
1696
1697static void swap_stop(struct seq_file *swap, void *v)
1698{
1699 mutex_unlock(&swapon_mutex);
1700}
1701
1702static int swap_show(struct seq_file *swap, void *v)
1703{
1704 struct swap_info_struct *ptr = v;
1705 struct file *file;
1706 int len;
1707
1708 if (ptr == SEQ_START_TOKEN) {
1709 seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
1710 return 0;
1711 }
1712
1713 file = ptr->swap_file;
1714 len = seq_path(swap, &file->f_path, " \t\n\\");
1715 seq_printf(swap, "%*s%s\t%u\t%u\t%d\n",
1716 len < 40 ? 40 - len : 1, " ",
1717 S_ISBLK(file->f_path.dentry->d_inode->i_mode) ?
1718 "partition" : "file\t",
1719 ptr->pages << (PAGE_SHIFT - 10),
1720 ptr->inuse_pages << (PAGE_SHIFT - 10),
1721 ptr->prio);
1722 return 0;
1723}
1724
1725static const struct seq_operations swaps_op = {
1726 .start = swap_start,
1727 .next = swap_next,
1728 .stop = swap_stop,
1729 .show = swap_show
1730};
1731
1732static int swaps_open(struct inode *inode, struct file *file)
1733{
1734 return seq_open(file, &swaps_op);
1735}
1736
1737static const struct file_operations proc_swaps_operations = {
1738 .open = swaps_open,
1739 .read = seq_read,
1740 .llseek = seq_lseek,
1741 .release = seq_release,
1742};
1743
1744static int __init procswaps_init(void)
1745{
1746 proc_create("swaps", 0, NULL, &proc_swaps_operations);
1747 return 0;
1748}
1749__initcall(procswaps_init);
1750#endif
1751
1752#ifdef MAX_SWAPFILES_CHECK
1753static int __init max_swapfiles_check(void)
1754{
1755 MAX_SWAPFILES_CHECK();
1756 return 0;
1757}
1758late_initcall(max_swapfiles_check);
1759#endif
1760
1761
1762
1763
1764
1765
1766SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1767{
1768 struct swap_info_struct * p;
1769 char *name = NULL;
1770 struct block_device *bdev = NULL;
1771 struct file *swap_file = NULL;
1772 struct address_space *mapping;
1773 unsigned int type;
1774 int i, prev;
1775 int error;
1776 union swap_header *swap_header = NULL;
1777 unsigned int nr_good_pages = 0;
1778 int nr_extents = 0;
1779 sector_t span;
1780 unsigned long maxpages = 1;
1781 unsigned long swapfilepages;
1782 unsigned short *swap_map = NULL;
1783 struct page *page = NULL;
1784 struct inode *inode = NULL;
1785 int did_down = 0;
1786
1787 if (!capable(CAP_SYS_ADMIN))
1788 return -EPERM;
1789 spin_lock(&swap_lock);
1790 p = swap_info;
1791 for (type = 0 ; type < nr_swapfiles ; type++,p++)
1792 if (!(p->flags & SWP_USED))
1793 break;
1794 error = -EPERM;
1795 if (type >= MAX_SWAPFILES) {
1796 spin_unlock(&swap_lock);
1797 goto out;
1798 }
1799 if (type >= nr_swapfiles)
1800 nr_swapfiles = type+1;
1801 memset(p, 0, sizeof(*p));
1802 INIT_LIST_HEAD(&p->extent_list);
1803 p->flags = SWP_USED;
1804 p->next = -1;
1805 spin_unlock(&swap_lock);
1806 name = getname(specialfile);
1807 error = PTR_ERR(name);
1808 if (IS_ERR(name)) {
1809 name = NULL;
1810 goto bad_swap_2;
1811 }
1812 swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0);
1813 error = PTR_ERR(swap_file);
1814 if (IS_ERR(swap_file)) {
1815 swap_file = NULL;
1816 goto bad_swap_2;
1817 }
1818
1819 p->swap_file = swap_file;
1820 mapping = swap_file->f_mapping;
1821 inode = mapping->host;
1822
1823 error = -EBUSY;
1824 for (i = 0; i < nr_swapfiles; i++) {
1825 struct swap_info_struct *q = &swap_info[i];
1826
1827 if (i == type || !q->swap_file)
1828 continue;
1829 if (mapping == q->swap_file->f_mapping)
1830 goto bad_swap;
1831 }
1832
1833 error = -EINVAL;
1834 if (S_ISBLK(inode->i_mode)) {
1835 bdev = I_BDEV(inode);
1836 error = bd_claim(bdev, sys_swapon);
1837 if (error < 0) {
1838 bdev = NULL;
1839 error = -EINVAL;
1840 goto bad_swap;
1841 }
1842 p->old_block_size = block_size(bdev);
1843 error = set_blocksize(bdev, PAGE_SIZE);
1844 if (error < 0)
1845 goto bad_swap;
1846 p->bdev = bdev;
1847 } else if (S_ISREG(inode->i_mode)) {
1848 p->bdev = inode->i_sb->s_bdev;
1849 mutex_lock(&inode->i_mutex);
1850 did_down = 1;
1851 if (IS_SWAPFILE(inode)) {
1852 error = -EBUSY;
1853 goto bad_swap;
1854 }
1855 } else {
1856 goto bad_swap;
1857 }
1858
1859 swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
1860
1861
1862
1863
1864 if (!mapping->a_ops->readpage) {
1865 error = -EINVAL;
1866 goto bad_swap;
1867 }
1868 page = read_mapping_page(mapping, 0, swap_file);
1869 if (IS_ERR(page)) {
1870 error = PTR_ERR(page);
1871 goto bad_swap;
1872 }
1873 swap_header = kmap(page);
1874
1875 if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
1876 printk(KERN_ERR "Unable to find swap-space signature\n");
1877 error = -EINVAL;
1878 goto bad_swap;
1879 }
1880
1881
1882 if (swab32(swap_header->info.version) == 1) {
1883 swab32s(&swap_header->info.version);
1884 swab32s(&swap_header->info.last_page);
1885 swab32s(&swap_header->info.nr_badpages);
1886 for (i = 0; i < swap_header->info.nr_badpages; i++)
1887 swab32s(&swap_header->info.badpages[i]);
1888 }
1889
1890 if (swap_header->info.version != 1) {
1891 printk(KERN_WARNING
1892 "Unable to handle swap header version %d\n",
1893 swap_header->info.version);
1894 error = -EINVAL;
1895 goto bad_swap;
1896 }
1897
1898 p->lowest_bit = 1;
1899 p->cluster_next = 1;
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915 maxpages = swp_offset(pte_to_swp_entry(
1916 swp_entry_to_pte(swp_entry(0, ~0UL)))) - 1;
1917 if (maxpages > swap_header->info.last_page)
1918 maxpages = swap_header->info.last_page;
1919 p->highest_bit = maxpages - 1;
1920
1921 error = -EINVAL;
1922 if (!maxpages)
1923 goto bad_swap;
1924 if (swapfilepages && maxpages > swapfilepages) {
1925 printk(KERN_WARNING
1926 "Swap area shorter than signature indicates\n");
1927 goto bad_swap;
1928 }
1929 if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
1930 goto bad_swap;
1931 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
1932 goto bad_swap;
1933
1934
1935 swap_map = vmalloc(maxpages * sizeof(short));
1936 if (!swap_map) {
1937 error = -ENOMEM;
1938 goto bad_swap;
1939 }
1940
1941 memset(swap_map, 0, maxpages * sizeof(short));
1942 for (i = 0; i < swap_header->info.nr_badpages; i++) {
1943 int page_nr = swap_header->info.badpages[i];
1944 if (page_nr <= 0 || page_nr >= swap_header->info.last_page) {
1945 error = -EINVAL;
1946 goto bad_swap;
1947 }
1948 swap_map[page_nr] = SWAP_MAP_BAD;
1949 }
1950
1951 error = swap_cgroup_swapon(type, maxpages);
1952 if (error)
1953 goto bad_swap;
1954
1955 nr_good_pages = swap_header->info.last_page -
1956 swap_header->info.nr_badpages -
1957 1 ;
1958
1959 if (nr_good_pages) {
1960 swap_map[0] = SWAP_MAP_BAD;
1961 p->max = maxpages;
1962 p->pages = nr_good_pages;
1963 nr_extents = setup_swap_extents(p, &span);
1964 if (nr_extents < 0) {
1965 error = nr_extents;
1966 goto bad_swap;
1967 }
1968 nr_good_pages = p->pages;
1969 }
1970 if (!nr_good_pages) {
1971 printk(KERN_WARNING "Empty swap-file\n");
1972 error = -EINVAL;
1973 goto bad_swap;
1974 }
1975
1976 if (p->bdev) {
1977 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
1978 p->flags |= SWP_SOLIDSTATE;
1979 p->cluster_next = 1 + (random32() % p->highest_bit);
1980 }
1981 if (discard_swap(p) == 0)
1982 p->flags |= SWP_DISCARDABLE;
1983 }
1984
1985 mutex_lock(&swapon_mutex);
1986 spin_lock(&swap_lock);
1987 if (swap_flags & SWAP_FLAG_PREFER)
1988 p->prio =
1989 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
1990 else
1991 p->prio = --least_priority;
1992 p->swap_map = swap_map;
1993 p->flags |= SWP_WRITEOK;
1994 nr_swap_pages += nr_good_pages;
1995 total_swap_pages += nr_good_pages;
1996
1997 printk(KERN_INFO "Adding %uk swap on %s. "
1998 "Priority:%d extents:%d across:%lluk %s%s\n",
1999 nr_good_pages<<(PAGE_SHIFT-10), name, p->prio,
2000 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
2001 (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
2002 (p->flags & SWP_DISCARDABLE) ? "D" : "");
2003
2004
2005 prev = -1;
2006 for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
2007 if (p->prio >= swap_info[i].prio) {
2008 break;
2009 }
2010 prev = i;
2011 }
2012 p->next = i;
2013 if (prev < 0) {
2014 swap_list.head = swap_list.next = p - swap_info;
2015 } else {
2016 swap_info[prev].next = p - swap_info;
2017 }
2018 spin_unlock(&swap_lock);
2019 mutex_unlock(&swapon_mutex);
2020 error = 0;
2021 goto out;
2022bad_swap:
2023 if (bdev) {
2024 set_blocksize(bdev, p->old_block_size);
2025 bd_release(bdev);
2026 }
2027 destroy_swap_extents(p);
2028 swap_cgroup_swapoff(type);
2029bad_swap_2:
2030 spin_lock(&swap_lock);
2031 p->swap_file = NULL;
2032 p->flags = 0;
2033 spin_unlock(&swap_lock);
2034 vfree(swap_map);
2035 if (swap_file)
2036 filp_close(swap_file, NULL);
2037out:
2038 if (page && !IS_ERR(page)) {
2039 kunmap(page);
2040 page_cache_release(page);
2041 }
2042 if (name)
2043 putname(name);
2044 if (did_down) {
2045 if (!error)
2046 inode->i_flags |= S_SWAPFILE;
2047 mutex_unlock(&inode->i_mutex);
2048 }
2049 return error;
2050}
2051
2052void si_swapinfo(struct sysinfo *val)
2053{
2054 unsigned int i;
2055 unsigned long nr_to_be_unused = 0;
2056
2057 spin_lock(&swap_lock);
2058 for (i = 0; i < nr_swapfiles; i++) {
2059 if (!(swap_info[i].flags & SWP_USED) ||
2060 (swap_info[i].flags & SWP_WRITEOK))
2061 continue;
2062 nr_to_be_unused += swap_info[i].inuse_pages;
2063 }
2064 val->freeswap = nr_swap_pages + nr_to_be_unused;
2065 val->totalswap = total_swap_pages + nr_to_be_unused;
2066 spin_unlock(&swap_lock);
2067}
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081static int __swap_duplicate(swp_entry_t entry, bool cache)
2082{
2083 struct swap_info_struct * p;
2084 unsigned long offset, type;
2085 int result = -EINVAL;
2086 int count;
2087 bool has_cache;
2088
2089 if (non_swap_entry(entry))
2090 return -EINVAL;
2091
2092 type = swp_type(entry);
2093 if (type >= nr_swapfiles)
2094 goto bad_file;
2095 p = type + swap_info;
2096 offset = swp_offset(entry);
2097
2098 spin_lock(&swap_lock);
2099
2100 if (unlikely(offset >= p->max))
2101 goto unlock_out;
2102
2103 count = swap_count(p->swap_map[offset]);
2104 has_cache = swap_has_cache(p->swap_map[offset]);
2105
2106 if (cache == SWAP_CACHE) {
2107
2108
2109 if (!has_cache && count) {
2110 p->swap_map[offset] = encode_swapmap(count, true);
2111 result = 0;
2112 } else if (has_cache)
2113 result = -EEXIST;
2114 else if (!count)
2115 result = -ENOENT;
2116
2117 } else if (count || has_cache) {
2118 if (count < SWAP_MAP_MAX - 1) {
2119 p->swap_map[offset] = encode_swapmap(count + 1,
2120 has_cache);
2121 result = 0;
2122 } else if (count <= SWAP_MAP_MAX) {
2123 if (swap_overflow++ < 5)
2124 printk(KERN_WARNING
2125 "swap_dup: swap entry overflow\n");
2126 p->swap_map[offset] = encode_swapmap(SWAP_MAP_MAX,
2127 has_cache);
2128 result = 0;
2129 }
2130 } else
2131 result = -ENOENT;
2132unlock_out:
2133 spin_unlock(&swap_lock);
2134out:
2135 return result;
2136
2137bad_file:
2138 printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
2139 goto out;
2140}
2141
2142
2143
2144void swap_duplicate(swp_entry_t entry)
2145{
2146 __swap_duplicate(entry, SWAP_MAP);
2147}
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157int swapcache_prepare(swp_entry_t entry)
2158{
2159 return __swap_duplicate(entry, SWAP_CACHE);
2160}
2161
2162
2163struct swap_info_struct *
2164get_swap_info_struct(unsigned type)
2165{
2166 return &swap_info[type];
2167}
2168
2169
2170
2171
2172
2173int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
2174{
2175 struct swap_info_struct *si;
2176 int our_page_cluster = page_cluster;
2177 pgoff_t target, toff;
2178 pgoff_t base, end;
2179 int nr_pages = 0;
2180
2181 if (!our_page_cluster)
2182 return 0;
2183
2184 si = &swap_info[swp_type(entry)];
2185 target = swp_offset(entry);
2186 base = (target >> our_page_cluster) << our_page_cluster;
2187 end = base + (1 << our_page_cluster);
2188 if (!base)
2189 base++;
2190
2191 spin_lock(&swap_lock);
2192 if (end > si->max)
2193 end = si->max;
2194
2195
2196 for (toff = target; ++toff < end; nr_pages++) {
2197
2198 if (!si->swap_map[toff])
2199 break;
2200 if (swap_count(si->swap_map[toff]) == SWAP_MAP_BAD)
2201 break;
2202 }
2203
2204 for (toff = target; --toff >= base; nr_pages++) {
2205
2206 if (!si->swap_map[toff])
2207 break;
2208 if (swap_count(si->swap_map[toff]) == SWAP_MAP_BAD)
2209 break;
2210 }
2211 spin_unlock(&swap_lock);
2212
2213
2214
2215
2216
2217 *offset = ++toff;
2218 return nr_pages? ++nr_pages: 0;
2219}
2220