1
2
3
4
5
6
7
8#include <linux/mm.h>
9#include <linux/sched/mm.h>
10#include <linux/sched/task.h>
11#include <linux/hugetlb.h>
12#include <linux/mman.h>
13#include <linux/slab.h>
14#include <linux/kernel_stat.h>
15#include <linux/swap.h>
16#include <linux/vmalloc.h>
17#include <linux/pagemap.h>
18#include <linux/namei.h>
19#include <linux/shmem_fs.h>
20#include <linux/blkdev.h>
21#include <linux/random.h>
22#include <linux/writeback.h>
23#include <linux/proc_fs.h>
24#include <linux/seq_file.h>
25#include <linux/init.h>
26#include <linux/ksm.h>
27#include <linux/rmap.h>
28#include <linux/security.h>
29#include <linux/backing-dev.h>
30#include <linux/mutex.h>
31#include <linux/capability.h>
32#include <linux/syscalls.h>
33#include <linux/memcontrol.h>
34#include <linux/poll.h>
35#include <linux/oom.h>
36#include <linux/frontswap.h>
37#include <linux/swapfile.h>
38#include <linux/export.h>
39#include <linux/swap_slots.h>
40#include <linux/sort.h>
41
42#include <asm/pgtable.h>
43#include <asm/tlbflush.h>
44#include <linux/swapops.h>
45#include <linux/swap_cgroup.h>
46
47static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
48 unsigned char);
49static void free_swap_count_continuations(struct swap_info_struct *);
50static sector_t map_swap_entry(swp_entry_t, struct block_device**);
51
52DEFINE_SPINLOCK(swap_lock);
53static unsigned int nr_swapfiles;
54atomic_long_t nr_swap_pages;
55
56
57
58
59
60EXPORT_SYMBOL_GPL(nr_swap_pages);
61
62long total_swap_pages;
63static int least_priority;
64
65static const char Bad_file[] = "Bad swap file entry ";
66static const char Unused_file[] = "Unused swap file entry ";
67static const char Bad_offset[] = "Bad swap offset entry ";
68static const char Unused_offset[] = "Unused swap offset entry ";
69
70
71
72
73
74PLIST_HEAD(swap_active_head);
75
76
77
78
79
80
81
82
83
84
85
86
87
88static PLIST_HEAD(swap_avail_head);
89static DEFINE_SPINLOCK(swap_avail_lock);
90
91struct swap_info_struct *swap_info[MAX_SWAPFILES];
92
93static DEFINE_MUTEX(swapon_mutex);
94
95static DECLARE_WAIT_QUEUE_HEAD(proc_poll_wait);
96
97static atomic_t proc_poll_event = ATOMIC_INIT(0);
98
99static inline unsigned char swap_count(unsigned char ent)
100{
101 return ent & ~SWAP_HAS_CACHE;
102}
103
104
105static int
106__try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
107{
108 swp_entry_t entry = swp_entry(si->type, offset);
109 struct page *page;
110 int ret = 0;
111
112 page = find_get_page(swap_address_space(entry), swp_offset(entry));
113 if (!page)
114 return 0;
115
116
117
118
119
120
121
122 if (trylock_page(page)) {
123 ret = try_to_free_swap(page);
124 unlock_page(page);
125 }
126 put_page(page);
127 return ret;
128}
129
130
131
132
133
134static int discard_swap(struct swap_info_struct *si)
135{
136 struct swap_extent *se;
137 sector_t start_block;
138 sector_t nr_blocks;
139 int err = 0;
140
141
142 se = &si->first_swap_extent;
143 start_block = (se->start_block + 1) << (PAGE_SHIFT - 9);
144 nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
145 if (nr_blocks) {
146 err = blkdev_issue_discard(si->bdev, start_block,
147 nr_blocks, GFP_KERNEL, 0);
148 if (err)
149 return err;
150 cond_resched();
151 }
152
153 list_for_each_entry(se, &si->first_swap_extent.list, list) {
154 start_block = se->start_block << (PAGE_SHIFT - 9);
155 nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
156
157 err = blkdev_issue_discard(si->bdev, start_block,
158 nr_blocks, GFP_KERNEL, 0);
159 if (err)
160 break;
161
162 cond_resched();
163 }
164 return err;
165}
166
167
168
169
170
171static void discard_swap_cluster(struct swap_info_struct *si,
172 pgoff_t start_page, pgoff_t nr_pages)
173{
174 struct swap_extent *se = si->curr_swap_extent;
175 int found_extent = 0;
176
177 while (nr_pages) {
178 if (se->start_page <= start_page &&
179 start_page < se->start_page + se->nr_pages) {
180 pgoff_t offset = start_page - se->start_page;
181 sector_t start_block = se->start_block + offset;
182 sector_t nr_blocks = se->nr_pages - offset;
183
184 if (nr_blocks > nr_pages)
185 nr_blocks = nr_pages;
186 start_page += nr_blocks;
187 nr_pages -= nr_blocks;
188
189 if (!found_extent++)
190 si->curr_swap_extent = se;
191
192 start_block <<= PAGE_SHIFT - 9;
193 nr_blocks <<= PAGE_SHIFT - 9;
194 if (blkdev_issue_discard(si->bdev, start_block,
195 nr_blocks, GFP_NOIO, 0))
196 break;
197 }
198
199 se = list_next_entry(se, list);
200 }
201}
202
203#ifdef CONFIG_THP_SWAP
204#define SWAPFILE_CLUSTER HPAGE_PMD_NR
205#else
206#define SWAPFILE_CLUSTER 256
207#endif
208#define LATENCY_LIMIT 256
209
210static inline void cluster_set_flag(struct swap_cluster_info *info,
211 unsigned int flag)
212{
213 info->flags = flag;
214}
215
216static inline unsigned int cluster_count(struct swap_cluster_info *info)
217{
218 return info->data;
219}
220
221static inline void cluster_set_count(struct swap_cluster_info *info,
222 unsigned int c)
223{
224 info->data = c;
225}
226
227static inline void cluster_set_count_flag(struct swap_cluster_info *info,
228 unsigned int c, unsigned int f)
229{
230 info->flags = f;
231 info->data = c;
232}
233
234static inline unsigned int cluster_next(struct swap_cluster_info *info)
235{
236 return info->data;
237}
238
239static inline void cluster_set_next(struct swap_cluster_info *info,
240 unsigned int n)
241{
242 info->data = n;
243}
244
245static inline void cluster_set_next_flag(struct swap_cluster_info *info,
246 unsigned int n, unsigned int f)
247{
248 info->flags = f;
249 info->data = n;
250}
251
252static inline bool cluster_is_free(struct swap_cluster_info *info)
253{
254 return info->flags & CLUSTER_FLAG_FREE;
255}
256
257static inline bool cluster_is_null(struct swap_cluster_info *info)
258{
259 return info->flags & CLUSTER_FLAG_NEXT_NULL;
260}
261
262static inline void cluster_set_null(struct swap_cluster_info *info)
263{
264 info->flags = CLUSTER_FLAG_NEXT_NULL;
265 info->data = 0;
266}
267
268static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
269 unsigned long offset)
270{
271 struct swap_cluster_info *ci;
272
273 ci = si->cluster_info;
274 if (ci) {
275 ci += offset / SWAPFILE_CLUSTER;
276 spin_lock(&ci->lock);
277 }
278 return ci;
279}
280
281static inline void unlock_cluster(struct swap_cluster_info *ci)
282{
283 if (ci)
284 spin_unlock(&ci->lock);
285}
286
287static inline struct swap_cluster_info *lock_cluster_or_swap_info(
288 struct swap_info_struct *si,
289 unsigned long offset)
290{
291 struct swap_cluster_info *ci;
292
293 ci = lock_cluster(si, offset);
294 if (!ci)
295 spin_lock(&si->lock);
296
297 return ci;
298}
299
300static inline void unlock_cluster_or_swap_info(struct swap_info_struct *si,
301 struct swap_cluster_info *ci)
302{
303 if (ci)
304 unlock_cluster(ci);
305 else
306 spin_unlock(&si->lock);
307}
308
309static inline bool cluster_list_empty(struct swap_cluster_list *list)
310{
311 return cluster_is_null(&list->head);
312}
313
314static inline unsigned int cluster_list_first(struct swap_cluster_list *list)
315{
316 return cluster_next(&list->head);
317}
318
319static void cluster_list_init(struct swap_cluster_list *list)
320{
321 cluster_set_null(&list->head);
322 cluster_set_null(&list->tail);
323}
324
325static void cluster_list_add_tail(struct swap_cluster_list *list,
326 struct swap_cluster_info *ci,
327 unsigned int idx)
328{
329 if (cluster_list_empty(list)) {
330 cluster_set_next_flag(&list->head, idx, 0);
331 cluster_set_next_flag(&list->tail, idx, 0);
332 } else {
333 struct swap_cluster_info *ci_tail;
334 unsigned int tail = cluster_next(&list->tail);
335
336
337
338
339
340 ci_tail = ci + tail;
341 spin_lock_nested(&ci_tail->lock, SINGLE_DEPTH_NESTING);
342 cluster_set_next(ci_tail, idx);
343 spin_unlock(&ci_tail->lock);
344 cluster_set_next_flag(&list->tail, idx, 0);
345 }
346}
347
348static unsigned int cluster_list_del_first(struct swap_cluster_list *list,
349 struct swap_cluster_info *ci)
350{
351 unsigned int idx;
352
353 idx = cluster_next(&list->head);
354 if (cluster_next(&list->tail) == idx) {
355 cluster_set_null(&list->head);
356 cluster_set_null(&list->tail);
357 } else
358 cluster_set_next_flag(&list->head,
359 cluster_next(&ci[idx]), 0);
360
361 return idx;
362}
363
364
365static void swap_cluster_schedule_discard(struct swap_info_struct *si,
366 unsigned int idx)
367{
368
369
370
371
372
373
374 memset(si->swap_map + idx * SWAPFILE_CLUSTER,
375 SWAP_MAP_BAD, SWAPFILE_CLUSTER);
376
377 cluster_list_add_tail(&si->discard_clusters, si->cluster_info, idx);
378
379 schedule_work(&si->discard_work);
380}
381
382static void __free_cluster(struct swap_info_struct *si, unsigned long idx)
383{
384 struct swap_cluster_info *ci = si->cluster_info;
385
386 cluster_set_flag(ci + idx, CLUSTER_FLAG_FREE);
387 cluster_list_add_tail(&si->free_clusters, ci, idx);
388}
389
390
391
392
393
394static void swap_do_scheduled_discard(struct swap_info_struct *si)
395{
396 struct swap_cluster_info *info, *ci;
397 unsigned int idx;
398
399 info = si->cluster_info;
400
401 while (!cluster_list_empty(&si->discard_clusters)) {
402 idx = cluster_list_del_first(&si->discard_clusters, info);
403 spin_unlock(&si->lock);
404
405 discard_swap_cluster(si, idx * SWAPFILE_CLUSTER,
406 SWAPFILE_CLUSTER);
407
408 spin_lock(&si->lock);
409 ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
410 __free_cluster(si, idx);
411 memset(si->swap_map + idx * SWAPFILE_CLUSTER,
412 0, SWAPFILE_CLUSTER);
413 unlock_cluster(ci);
414 }
415}
416
417static void swap_discard_work(struct work_struct *work)
418{
419 struct swap_info_struct *si;
420
421 si = container_of(work, struct swap_info_struct, discard_work);
422
423 spin_lock(&si->lock);
424 swap_do_scheduled_discard(si);
425 spin_unlock(&si->lock);
426}
427
428static void alloc_cluster(struct swap_info_struct *si, unsigned long idx)
429{
430 struct swap_cluster_info *ci = si->cluster_info;
431
432 VM_BUG_ON(cluster_list_first(&si->free_clusters) != idx);
433 cluster_list_del_first(&si->free_clusters, ci);
434 cluster_set_count_flag(ci + idx, 0, 0);
435}
436
437static void free_cluster(struct swap_info_struct *si, unsigned long idx)
438{
439 struct swap_cluster_info *ci = si->cluster_info + idx;
440
441 VM_BUG_ON(cluster_count(ci) != 0);
442
443
444
445
446
447 if ((si->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) ==
448 (SWP_WRITEOK | SWP_PAGE_DISCARD)) {
449 swap_cluster_schedule_discard(si, idx);
450 return;
451 }
452
453 __free_cluster(si, idx);
454}
455
456
457
458
459
460static void inc_cluster_info_page(struct swap_info_struct *p,
461 struct swap_cluster_info *cluster_info, unsigned long page_nr)
462{
463 unsigned long idx = page_nr / SWAPFILE_CLUSTER;
464
465 if (!cluster_info)
466 return;
467 if (cluster_is_free(&cluster_info[idx]))
468 alloc_cluster(p, idx);
469
470 VM_BUG_ON(cluster_count(&cluster_info[idx]) >= SWAPFILE_CLUSTER);
471 cluster_set_count(&cluster_info[idx],
472 cluster_count(&cluster_info[idx]) + 1);
473}
474
475
476
477
478
479
480static void dec_cluster_info_page(struct swap_info_struct *p,
481 struct swap_cluster_info *cluster_info, unsigned long page_nr)
482{
483 unsigned long idx = page_nr / SWAPFILE_CLUSTER;
484
485 if (!cluster_info)
486 return;
487
488 VM_BUG_ON(cluster_count(&cluster_info[idx]) == 0);
489 cluster_set_count(&cluster_info[idx],
490 cluster_count(&cluster_info[idx]) - 1);
491
492 if (cluster_count(&cluster_info[idx]) == 0)
493 free_cluster(p, idx);
494}
495
496
497
498
499
500static bool
501scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
502 unsigned long offset)
503{
504 struct percpu_cluster *percpu_cluster;
505 bool conflict;
506
507 offset /= SWAPFILE_CLUSTER;
508 conflict = !cluster_list_empty(&si->free_clusters) &&
509 offset != cluster_list_first(&si->free_clusters) &&
510 cluster_is_free(&si->cluster_info[offset]);
511
512 if (!conflict)
513 return false;
514
515 percpu_cluster = this_cpu_ptr(si->percpu_cluster);
516 cluster_set_null(&percpu_cluster->index);
517 return true;
518}
519
520
521
522
523
524static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
525 unsigned long *offset, unsigned long *scan_base)
526{
527 struct percpu_cluster *cluster;
528 struct swap_cluster_info *ci;
529 bool found_free;
530 unsigned long tmp, max;
531
532new_cluster:
533 cluster = this_cpu_ptr(si->percpu_cluster);
534 if (cluster_is_null(&cluster->index)) {
535 if (!cluster_list_empty(&si->free_clusters)) {
536 cluster->index = si->free_clusters.head;
537 cluster->next = cluster_next(&cluster->index) *
538 SWAPFILE_CLUSTER;
539 } else if (!cluster_list_empty(&si->discard_clusters)) {
540
541
542
543
544 swap_do_scheduled_discard(si);
545 *scan_base = *offset = si->cluster_next;
546 goto new_cluster;
547 } else
548 return false;
549 }
550
551 found_free = false;
552
553
554
555
556
557 tmp = cluster->next;
558 max = min_t(unsigned long, si->max,
559 (cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER);
560 if (tmp >= max) {
561 cluster_set_null(&cluster->index);
562 goto new_cluster;
563 }
564 ci = lock_cluster(si, tmp);
565 while (tmp < max) {
566 if (!si->swap_map[tmp]) {
567 found_free = true;
568 break;
569 }
570 tmp++;
571 }
572 unlock_cluster(ci);
573 if (!found_free) {
574 cluster_set_null(&cluster->index);
575 goto new_cluster;
576 }
577 cluster->next = tmp + 1;
578 *offset = tmp;
579 *scan_base = tmp;
580 return found_free;
581}
582
583static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset,
584 unsigned int nr_entries)
585{
586 unsigned int end = offset + nr_entries - 1;
587
588 if (offset == si->lowest_bit)
589 si->lowest_bit += nr_entries;
590 if (end == si->highest_bit)
591 si->highest_bit -= nr_entries;
592 si->inuse_pages += nr_entries;
593 if (si->inuse_pages == si->pages) {
594 si->lowest_bit = si->max;
595 si->highest_bit = 0;
596 spin_lock(&swap_avail_lock);
597 plist_del(&si->avail_list, &swap_avail_head);
598 spin_unlock(&swap_avail_lock);
599 }
600}
601
602static void swap_range_free(struct swap_info_struct *si, unsigned long offset,
603 unsigned int nr_entries)
604{
605 unsigned long end = offset + nr_entries - 1;
606 void (*swap_slot_free_notify)(struct block_device *, unsigned long);
607
608 if (offset < si->lowest_bit)
609 si->lowest_bit = offset;
610 if (end > si->highest_bit) {
611 bool was_full = !si->highest_bit;
612
613 si->highest_bit = end;
614 if (was_full && (si->flags & SWP_WRITEOK)) {
615 spin_lock(&swap_avail_lock);
616 WARN_ON(!plist_node_empty(&si->avail_list));
617 if (plist_node_empty(&si->avail_list))
618 plist_add(&si->avail_list, &swap_avail_head);
619 spin_unlock(&swap_avail_lock);
620 }
621 }
622 atomic_long_add(nr_entries, &nr_swap_pages);
623 si->inuse_pages -= nr_entries;
624 if (si->flags & SWP_BLKDEV)
625 swap_slot_free_notify =
626 si->bdev->bd_disk->fops->swap_slot_free_notify;
627 else
628 swap_slot_free_notify = NULL;
629 while (offset <= end) {
630 frontswap_invalidate_page(si->type, offset);
631 if (swap_slot_free_notify)
632 swap_slot_free_notify(si->bdev, offset);
633 offset++;
634 }
635}
636
637static int scan_swap_map_slots(struct swap_info_struct *si,
638 unsigned char usage, int nr,
639 swp_entry_t slots[])
640{
641 struct swap_cluster_info *ci;
642 unsigned long offset;
643 unsigned long scan_base;
644 unsigned long last_in_cluster = 0;
645 int latency_ration = LATENCY_LIMIT;
646 int n_ret = 0;
647
648 if (nr > SWAP_BATCH)
649 nr = SWAP_BATCH;
650
651
652
653
654
655
656
657
658
659
660
661
662 si->flags += SWP_SCANNING;
663 scan_base = offset = si->cluster_next;
664
665
666 if (si->cluster_info) {
667 if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
668 goto checks;
669 else
670 goto scan;
671 }
672
673 if (unlikely(!si->cluster_nr--)) {
674 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) {
675 si->cluster_nr = SWAPFILE_CLUSTER - 1;
676 goto checks;
677 }
678
679 spin_unlock(&si->lock);
680
681
682
683
684
685
686
687 scan_base = offset = si->lowest_bit;
688 last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
689
690
691 for (; last_in_cluster <= si->highest_bit; offset++) {
692 if (si->swap_map[offset])
693 last_in_cluster = offset + SWAPFILE_CLUSTER;
694 else if (offset == last_in_cluster) {
695 spin_lock(&si->lock);
696 offset -= SWAPFILE_CLUSTER - 1;
697 si->cluster_next = offset;
698 si->cluster_nr = SWAPFILE_CLUSTER - 1;
699 goto checks;
700 }
701 if (unlikely(--latency_ration < 0)) {
702 cond_resched();
703 latency_ration = LATENCY_LIMIT;
704 }
705 }
706
707 offset = scan_base;
708 spin_lock(&si->lock);
709 si->cluster_nr = SWAPFILE_CLUSTER - 1;
710 }
711
712checks:
713 if (si->cluster_info) {
714 while (scan_swap_map_ssd_cluster_conflict(si, offset)) {
715
716 if (n_ret)
717 goto done;
718 if (!scan_swap_map_try_ssd_cluster(si, &offset,
719 &scan_base))
720 goto scan;
721 }
722 }
723 if (!(si->flags & SWP_WRITEOK))
724 goto no_page;
725 if (!si->highest_bit)
726 goto no_page;
727 if (offset > si->highest_bit)
728 scan_base = offset = si->lowest_bit;
729
730 ci = lock_cluster(si, offset);
731
732 if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
733 int swap_was_freed;
734 unlock_cluster(ci);
735 spin_unlock(&si->lock);
736 swap_was_freed = __try_to_reclaim_swap(si, offset);
737 spin_lock(&si->lock);
738
739 if (swap_was_freed)
740 goto checks;
741 goto scan;
742 }
743
744 if (si->swap_map[offset]) {
745 unlock_cluster(ci);
746 if (!n_ret)
747 goto scan;
748 else
749 goto done;
750 }
751 si->swap_map[offset] = usage;
752 inc_cluster_info_page(si, si->cluster_info, offset);
753 unlock_cluster(ci);
754
755 swap_range_alloc(si, offset, 1);
756 si->cluster_next = offset + 1;
757 slots[n_ret++] = swp_entry(si->type, offset);
758
759
760 if ((n_ret == nr) || (offset >= si->highest_bit))
761 goto done;
762
763
764
765
766 if (unlikely(--latency_ration < 0)) {
767 if (n_ret)
768 goto done;
769 spin_unlock(&si->lock);
770 cond_resched();
771 spin_lock(&si->lock);
772 latency_ration = LATENCY_LIMIT;
773 }
774
775
776 if (si->cluster_info) {
777 if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
778 goto checks;
779 else
780 goto done;
781 }
782
783 ++offset;
784
785
786 if (si->cluster_nr && !si->swap_map[offset]) {
787 --si->cluster_nr;
788 goto checks;
789 }
790
791done:
792 si->flags -= SWP_SCANNING;
793 return n_ret;
794
795scan:
796 spin_unlock(&si->lock);
797 while (++offset <= si->highest_bit) {
798 if (!si->swap_map[offset]) {
799 spin_lock(&si->lock);
800 goto checks;
801 }
802 if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
803 spin_lock(&si->lock);
804 goto checks;
805 }
806 if (unlikely(--latency_ration < 0)) {
807 cond_resched();
808 latency_ration = LATENCY_LIMIT;
809 }
810 }
811 offset = si->lowest_bit;
812 while (offset < scan_base) {
813 if (!si->swap_map[offset]) {
814 spin_lock(&si->lock);
815 goto checks;
816 }
817 if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
818 spin_lock(&si->lock);
819 goto checks;
820 }
821 if (unlikely(--latency_ration < 0)) {
822 cond_resched();
823 latency_ration = LATENCY_LIMIT;
824 }
825 offset++;
826 }
827 spin_lock(&si->lock);
828
829no_page:
830 si->flags -= SWP_SCANNING;
831 return n_ret;
832}
833
834#ifdef CONFIG_THP_SWAP
835static int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot)
836{
837 unsigned long idx;
838 struct swap_cluster_info *ci;
839 unsigned long offset, i;
840 unsigned char *map;
841
842 if (cluster_list_empty(&si->free_clusters))
843 return 0;
844
845 idx = cluster_list_first(&si->free_clusters);
846 offset = idx * SWAPFILE_CLUSTER;
847 ci = lock_cluster(si, offset);
848 alloc_cluster(si, idx);
849 cluster_set_count_flag(ci, SWAPFILE_CLUSTER, 0);
850
851 map = si->swap_map + offset;
852 for (i = 0; i < SWAPFILE_CLUSTER; i++)
853 map[i] = SWAP_HAS_CACHE;
854 unlock_cluster(ci);
855 swap_range_alloc(si, offset, SWAPFILE_CLUSTER);
856 *slot = swp_entry(si->type, offset);
857
858 return 1;
859}
860
861static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx)
862{
863 unsigned long offset = idx * SWAPFILE_CLUSTER;
864 struct swap_cluster_info *ci;
865
866 ci = lock_cluster(si, offset);
867 cluster_set_count_flag(ci, 0, 0);
868 free_cluster(si, idx);
869 unlock_cluster(ci);
870 swap_range_free(si, offset, SWAPFILE_CLUSTER);
871}
872#else
873static int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot)
874{
875 VM_WARN_ON_ONCE(1);
876 return 0;
877}
878#endif
879
880static unsigned long scan_swap_map(struct swap_info_struct *si,
881 unsigned char usage)
882{
883 swp_entry_t entry;
884 int n_ret;
885
886 n_ret = scan_swap_map_slots(si, usage, 1, &entry);
887
888 if (n_ret)
889 return swp_offset(entry);
890 else
891 return 0;
892
893}
894
895int get_swap_pages(int n_goal, bool cluster, swp_entry_t swp_entries[])
896{
897 unsigned long nr_pages = cluster ? SWAPFILE_CLUSTER : 1;
898 struct swap_info_struct *si, *next;
899 long avail_pgs;
900 int n_ret = 0;
901
902
903 WARN_ON_ONCE(n_goal > 1 && cluster);
904
905 avail_pgs = atomic_long_read(&nr_swap_pages) / nr_pages;
906 if (avail_pgs <= 0)
907 goto noswap;
908
909 if (n_goal > SWAP_BATCH)
910 n_goal = SWAP_BATCH;
911
912 if (n_goal > avail_pgs)
913 n_goal = avail_pgs;
914
915 atomic_long_sub(n_goal * nr_pages, &nr_swap_pages);
916
917 spin_lock(&swap_avail_lock);
918
919start_over:
920 plist_for_each_entry_safe(si, next, &swap_avail_head, avail_list) {
921
922 plist_requeue(&si->avail_list, &swap_avail_head);
923 spin_unlock(&swap_avail_lock);
924 spin_lock(&si->lock);
925 if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
926 spin_lock(&swap_avail_lock);
927 if (plist_node_empty(&si->avail_list)) {
928 spin_unlock(&si->lock);
929 goto nextsi;
930 }
931 WARN(!si->highest_bit,
932 "swap_info %d in list but !highest_bit\n",
933 si->type);
934 WARN(!(si->flags & SWP_WRITEOK),
935 "swap_info %d in list but !SWP_WRITEOK\n",
936 si->type);
937 plist_del(&si->avail_list, &swap_avail_head);
938 spin_unlock(&si->lock);
939 goto nextsi;
940 }
941 if (cluster)
942 n_ret = swap_alloc_cluster(si, swp_entries);
943 else
944 n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
945 n_goal, swp_entries);
946 spin_unlock(&si->lock);
947 if (n_ret || cluster)
948 goto check_out;
949 pr_debug("scan_swap_map of si %d failed to find offset\n",
950 si->type);
951
952 spin_lock(&swap_avail_lock);
953nextsi:
954
955
956
957
958
959
960
961
962
963
964
965 if (plist_node_empty(&next->avail_list))
966 goto start_over;
967 }
968
969 spin_unlock(&swap_avail_lock);
970
971check_out:
972 if (n_ret < n_goal)
973 atomic_long_add((long)(n_goal - n_ret) * nr_pages,
974 &nr_swap_pages);
975noswap:
976 return n_ret;
977}
978
979
980swp_entry_t get_swap_page_of_type(int type)
981{
982 struct swap_info_struct *si;
983 pgoff_t offset;
984
985 si = swap_info[type];
986 spin_lock(&si->lock);
987 if (si && (si->flags & SWP_WRITEOK)) {
988 atomic_long_dec(&nr_swap_pages);
989
990 offset = scan_swap_map(si, 1);
991 if (offset) {
992 spin_unlock(&si->lock);
993 return swp_entry(type, offset);
994 }
995 atomic_long_inc(&nr_swap_pages);
996 }
997 spin_unlock(&si->lock);
998 return (swp_entry_t) {0};
999}
1000
1001static struct swap_info_struct *__swap_info_get(swp_entry_t entry)
1002{
1003 struct swap_info_struct *p;
1004 unsigned long offset, type;
1005
1006 if (!entry.val)
1007 goto out;
1008 type = swp_type(entry);
1009 if (type >= nr_swapfiles)
1010 goto bad_nofile;
1011 p = swap_info[type];
1012 if (!(p->flags & SWP_USED))
1013 goto bad_device;
1014 offset = swp_offset(entry);
1015 if (offset >= p->max)
1016 goto bad_offset;
1017 return p;
1018
1019bad_offset:
1020 pr_err("swap_info_get: %s%08lx\n", Bad_offset, entry.val);
1021 goto out;
1022bad_device:
1023 pr_err("swap_info_get: %s%08lx\n", Unused_file, entry.val);
1024 goto out;
1025bad_nofile:
1026 pr_err("swap_info_get: %s%08lx\n", Bad_file, entry.val);
1027out:
1028 return NULL;
1029}
1030
1031static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
1032{
1033 struct swap_info_struct *p;
1034
1035 p = __swap_info_get(entry);
1036 if (!p)
1037 goto out;
1038 if (!p->swap_map[swp_offset(entry)])
1039 goto bad_free;
1040 return p;
1041
1042bad_free:
1043 pr_err("swap_info_get: %s%08lx\n", Unused_offset, entry.val);
1044 goto out;
1045out:
1046 return NULL;
1047}
1048
1049static struct swap_info_struct *swap_info_get(swp_entry_t entry)
1050{
1051 struct swap_info_struct *p;
1052
1053 p = _swap_info_get(entry);
1054 if (p)
1055 spin_lock(&p->lock);
1056 return p;
1057}
1058
1059static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry,
1060 struct swap_info_struct *q)
1061{
1062 struct swap_info_struct *p;
1063
1064 p = _swap_info_get(entry);
1065
1066 if (p != q) {
1067 if (q != NULL)
1068 spin_unlock(&q->lock);
1069 if (p != NULL)
1070 spin_lock(&p->lock);
1071 }
1072 return p;
1073}
1074
1075static unsigned char __swap_entry_free(struct swap_info_struct *p,
1076 swp_entry_t entry, unsigned char usage)
1077{
1078 struct swap_cluster_info *ci;
1079 unsigned long offset = swp_offset(entry);
1080 unsigned char count;
1081 unsigned char has_cache;
1082
1083 ci = lock_cluster_or_swap_info(p, offset);
1084
1085 count = p->swap_map[offset];
1086
1087 has_cache = count & SWAP_HAS_CACHE;
1088 count &= ~SWAP_HAS_CACHE;
1089
1090 if (usage == SWAP_HAS_CACHE) {
1091 VM_BUG_ON(!has_cache);
1092 has_cache = 0;
1093 } else if (count == SWAP_MAP_SHMEM) {
1094
1095
1096
1097
1098 count = 0;
1099 } else if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) {
1100 if (count == COUNT_CONTINUED) {
1101 if (swap_count_continued(p, offset, count))
1102 count = SWAP_MAP_MAX | COUNT_CONTINUED;
1103 else
1104 count = SWAP_MAP_MAX;
1105 } else
1106 count--;
1107 }
1108
1109 usage = count | has_cache;
1110 p->swap_map[offset] = usage ? : SWAP_HAS_CACHE;
1111
1112 unlock_cluster_or_swap_info(p, ci);
1113
1114 return usage;
1115}
1116
1117static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry)
1118{
1119 struct swap_cluster_info *ci;
1120 unsigned long offset = swp_offset(entry);
1121 unsigned char count;
1122
1123 ci = lock_cluster(p, offset);
1124 count = p->swap_map[offset];
1125 VM_BUG_ON(count != SWAP_HAS_CACHE);
1126 p->swap_map[offset] = 0;
1127 dec_cluster_info_page(p, p->cluster_info, offset);
1128 unlock_cluster(ci);
1129
1130 mem_cgroup_uncharge_swap(entry, 1);
1131 swap_range_free(p, offset, 1);
1132}
1133
1134
1135
1136
1137
1138void swap_free(swp_entry_t entry)
1139{
1140 struct swap_info_struct *p;
1141
1142 p = _swap_info_get(entry);
1143 if (p) {
1144 if (!__swap_entry_free(p, entry, 1))
1145 free_swap_slot(entry);
1146 }
1147}
1148
1149
1150
1151
1152static void swapcache_free(swp_entry_t entry)
1153{
1154 struct swap_info_struct *p;
1155
1156 p = _swap_info_get(entry);
1157 if (p) {
1158 if (!__swap_entry_free(p, entry, SWAP_HAS_CACHE))
1159 free_swap_slot(entry);
1160 }
1161}
1162
1163#ifdef CONFIG_THP_SWAP
1164static void swapcache_free_cluster(swp_entry_t entry)
1165{
1166 unsigned long offset = swp_offset(entry);
1167 unsigned long idx = offset / SWAPFILE_CLUSTER;
1168 struct swap_cluster_info *ci;
1169 struct swap_info_struct *si;
1170 unsigned char *map;
1171 unsigned int i;
1172
1173 si = swap_info_get(entry);
1174 if (!si)
1175 return;
1176
1177 ci = lock_cluster(si, offset);
1178 map = si->swap_map + offset;
1179 for (i = 0; i < SWAPFILE_CLUSTER; i++) {
1180 VM_BUG_ON(map[i] != SWAP_HAS_CACHE);
1181 map[i] = 0;
1182 }
1183 unlock_cluster(ci);
1184 mem_cgroup_uncharge_swap(entry, SWAPFILE_CLUSTER);
1185 swap_free_cluster(si, idx);
1186 spin_unlock(&si->lock);
1187}
1188#else
1189static inline void swapcache_free_cluster(swp_entry_t entry)
1190{
1191}
1192#endif
1193
1194void put_swap_page(struct page *page, swp_entry_t entry)
1195{
1196 if (!PageTransHuge(page))
1197 swapcache_free(entry);
1198 else
1199 swapcache_free_cluster(entry);
1200}
1201
1202static int swp_entry_cmp(const void *ent1, const void *ent2)
1203{
1204 const swp_entry_t *e1 = ent1, *e2 = ent2;
1205
1206 return (int)swp_type(*e1) - (int)swp_type(*e2);
1207}
1208
1209void swapcache_free_entries(swp_entry_t *entries, int n)
1210{
1211 struct swap_info_struct *p, *prev;
1212 int i;
1213
1214 if (n <= 0)
1215 return;
1216
1217 prev = NULL;
1218 p = NULL;
1219
1220
1221
1222
1223
1224
1225 if (nr_swapfiles > 1)
1226 sort(entries, n, sizeof(entries[0]), swp_entry_cmp, NULL);
1227 for (i = 0; i < n; ++i) {
1228 p = swap_info_get_cont(entries[i], prev);
1229 if (p)
1230 swap_entry_free(p, entries[i]);
1231 prev = p;
1232 }
1233 if (p)
1234 spin_unlock(&p->lock);
1235}
1236
1237
1238
1239
1240
1241
1242int page_swapcount(struct page *page)
1243{
1244 int count = 0;
1245 struct swap_info_struct *p;
1246 struct swap_cluster_info *ci;
1247 swp_entry_t entry;
1248 unsigned long offset;
1249
1250 entry.val = page_private(page);
1251 p = _swap_info_get(entry);
1252 if (p) {
1253 offset = swp_offset(entry);
1254 ci = lock_cluster_or_swap_info(p, offset);
1255 count = swap_count(p->swap_map[offset]);
1256 unlock_cluster_or_swap_info(p, ci);
1257 }
1258 return count;
1259}
1260
1261static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
1262{
1263 int count = 0;
1264 pgoff_t offset = swp_offset(entry);
1265 struct swap_cluster_info *ci;
1266
1267 ci = lock_cluster_or_swap_info(si, offset);
1268 count = swap_count(si->swap_map[offset]);
1269 unlock_cluster_or_swap_info(si, ci);
1270 return count;
1271}
1272
1273
1274
1275
1276
1277
1278int __swp_swapcount(swp_entry_t entry)
1279{
1280 int count = 0;
1281 struct swap_info_struct *si;
1282
1283 si = __swap_info_get(entry);
1284 if (si)
1285 count = swap_swapcount(si, entry);
1286 return count;
1287}
1288
1289
1290
1291
1292
1293int swp_swapcount(swp_entry_t entry)
1294{
1295 int count, tmp_count, n;
1296 struct swap_info_struct *p;
1297 struct swap_cluster_info *ci;
1298 struct page *page;
1299 pgoff_t offset;
1300 unsigned char *map;
1301
1302 p = _swap_info_get(entry);
1303 if (!p)
1304 return 0;
1305
1306 offset = swp_offset(entry);
1307
1308 ci = lock_cluster_or_swap_info(p, offset);
1309
1310 count = swap_count(p->swap_map[offset]);
1311 if (!(count & COUNT_CONTINUED))
1312 goto out;
1313
1314 count &= ~COUNT_CONTINUED;
1315 n = SWAP_MAP_MAX + 1;
1316
1317 page = vmalloc_to_page(p->swap_map + offset);
1318 offset &= ~PAGE_MASK;
1319 VM_BUG_ON(page_private(page) != SWP_CONTINUED);
1320
1321 do {
1322 page = list_next_entry(page, lru);
1323 map = kmap_atomic(page);
1324 tmp_count = map[offset];
1325 kunmap_atomic(map);
1326
1327 count += (tmp_count & ~COUNT_CONTINUED) * n;
1328 n *= (SWAP_CONT_MAX + 1);
1329 } while (tmp_count & COUNT_CONTINUED);
1330out:
1331 unlock_cluster_or_swap_info(p, ci);
1332 return count;
1333}
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345bool reuse_swap_page(struct page *page, int *total_mapcount)
1346{
1347 int count;
1348
1349 VM_BUG_ON_PAGE(!PageLocked(page), page);
1350 if (unlikely(PageKsm(page)))
1351 return false;
1352 count = page_trans_huge_mapcount(page, total_mapcount);
1353 if (count <= 1 && PageSwapCache(page)) {
1354 count += page_swapcount(page);
1355 if (count != 1)
1356 goto out;
1357 if (!PageWriteback(page)) {
1358 delete_from_swap_cache(page);
1359 SetPageDirty(page);
1360 } else {
1361 swp_entry_t entry;
1362 struct swap_info_struct *p;
1363
1364 entry.val = page_private(page);
1365 p = swap_info_get(entry);
1366 if (p->flags & SWP_STABLE_WRITES) {
1367 spin_unlock(&p->lock);
1368 return false;
1369 }
1370 spin_unlock(&p->lock);
1371 }
1372 }
1373out:
1374 return count <= 1;
1375}
1376
1377
1378
1379
1380
1381int try_to_free_swap(struct page *page)
1382{
1383 VM_BUG_ON_PAGE(!PageLocked(page), page);
1384
1385 if (!PageSwapCache(page))
1386 return 0;
1387 if (PageWriteback(page))
1388 return 0;
1389 if (page_swapcount(page))
1390 return 0;
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407 if (pm_suspended_storage())
1408 return 0;
1409
1410 delete_from_swap_cache(page);
1411 SetPageDirty(page);
1412 return 1;
1413}
1414
1415
1416
1417
1418
1419int free_swap_and_cache(swp_entry_t entry)
1420{
1421 struct swap_info_struct *p;
1422 struct page *page = NULL;
1423 unsigned char count;
1424
1425 if (non_swap_entry(entry))
1426 return 1;
1427
1428 p = _swap_info_get(entry);
1429 if (p) {
1430 count = __swap_entry_free(p, entry, 1);
1431 if (count == SWAP_HAS_CACHE) {
1432 page = find_get_page(swap_address_space(entry),
1433 swp_offset(entry));
1434 if (page && !trylock_page(page)) {
1435 put_page(page);
1436 page = NULL;
1437 }
1438 } else if (!count)
1439 free_swap_slot(entry);
1440 }
1441 if (page) {
1442
1443
1444
1445
1446 if (PageSwapCache(page) && !PageWriteback(page) &&
1447 (!page_mapped(page) || mem_cgroup_swap_full(page)) &&
1448 !swap_swapcount(p, entry)) {
1449 delete_from_swap_cache(page);
1450 SetPageDirty(page);
1451 }
1452 unlock_page(page);
1453 put_page(page);
1454 }
1455 return p != NULL;
1456}
1457
1458#ifdef CONFIG_HIBERNATION
1459
1460
1461
1462
1463
1464
1465
1466
1467int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
1468{
1469 struct block_device *bdev = NULL;
1470 int type;
1471
1472 if (device)
1473 bdev = bdget(device);
1474
1475 spin_lock(&swap_lock);
1476 for (type = 0; type < nr_swapfiles; type++) {
1477 struct swap_info_struct *sis = swap_info[type];
1478
1479 if (!(sis->flags & SWP_WRITEOK))
1480 continue;
1481
1482 if (!bdev) {
1483 if (bdev_p)
1484 *bdev_p = bdgrab(sis->bdev);
1485
1486 spin_unlock(&swap_lock);
1487 return type;
1488 }
1489 if (bdev == sis->bdev) {
1490 struct swap_extent *se = &sis->first_swap_extent;
1491
1492 if (se->start_block == offset) {
1493 if (bdev_p)
1494 *bdev_p = bdgrab(sis->bdev);
1495
1496 spin_unlock(&swap_lock);
1497 bdput(bdev);
1498 return type;
1499 }
1500 }
1501 }
1502 spin_unlock(&swap_lock);
1503 if (bdev)
1504 bdput(bdev);
1505
1506 return -ENODEV;
1507}
1508
1509
1510
1511
1512
1513sector_t swapdev_block(int type, pgoff_t offset)
1514{
1515 struct block_device *bdev;
1516
1517 if ((unsigned int)type >= nr_swapfiles)
1518 return 0;
1519 if (!(swap_info[type]->flags & SWP_WRITEOK))
1520 return 0;
1521 return map_swap_entry(swp_entry(type, offset), &bdev);
1522}
1523
1524
1525
1526
1527
1528
1529
1530unsigned int count_swap_pages(int type, int free)
1531{
1532 unsigned int n = 0;
1533
1534 spin_lock(&swap_lock);
1535 if ((unsigned int)type < nr_swapfiles) {
1536 struct swap_info_struct *sis = swap_info[type];
1537
1538 spin_lock(&sis->lock);
1539 if (sis->flags & SWP_WRITEOK) {
1540 n = sis->pages;
1541 if (free)
1542 n -= sis->inuse_pages;
1543 }
1544 spin_unlock(&sis->lock);
1545 }
1546 spin_unlock(&swap_lock);
1547 return n;
1548}
1549#endif
1550
1551static inline int pte_same_as_swp(pte_t pte, pte_t swp_pte)
1552{
1553 return pte_same(pte_swp_clear_soft_dirty(pte), swp_pte);
1554}
1555
1556
1557
1558
1559
1560
1561static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
1562 unsigned long addr, swp_entry_t entry, struct page *page)
1563{
1564 struct page *swapcache;
1565 struct mem_cgroup *memcg;
1566 spinlock_t *ptl;
1567 pte_t *pte;
1568 int ret = 1;
1569
1570 swapcache = page;
1571 page = ksm_might_need_to_copy(page, vma, addr);
1572 if (unlikely(!page))
1573 return -ENOMEM;
1574
1575 if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL,
1576 &memcg, false)) {
1577 ret = -ENOMEM;
1578 goto out_nolock;
1579 }
1580
1581 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
1582 if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) {
1583 mem_cgroup_cancel_charge(page, memcg, false);
1584 ret = 0;
1585 goto out;
1586 }
1587
1588 dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
1589 inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
1590 get_page(page);
1591 set_pte_at(vma->vm_mm, addr, pte,
1592 pte_mkold(mk_pte(page, vma->vm_page_prot)));
1593 if (page == swapcache) {
1594 page_add_anon_rmap(page, vma, addr, false);
1595 mem_cgroup_commit_charge(page, memcg, true, false);
1596 } else {
1597 page_add_new_anon_rmap(page, vma, addr, false);
1598 mem_cgroup_commit_charge(page, memcg, false, false);
1599 lru_cache_add_active_or_unevictable(page, vma);
1600 }
1601 swap_free(entry);
1602
1603
1604
1605
1606 activate_page(page);
1607out:
1608 pte_unmap_unlock(pte, ptl);
1609out_nolock:
1610 if (page != swapcache) {
1611 unlock_page(page);
1612 put_page(page);
1613 }
1614 return ret;
1615}
1616
1617static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
1618 unsigned long addr, unsigned long end,
1619 swp_entry_t entry, struct page *page)
1620{
1621 pte_t swp_pte = swp_entry_to_pte(entry);
1622 pte_t *pte;
1623 int ret = 0;
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634 pte = pte_offset_map(pmd, addr);
1635 do {
1636
1637
1638
1639
1640 if (unlikely(pte_same_as_swp(*pte, swp_pte))) {
1641 pte_unmap(pte);
1642 ret = unuse_pte(vma, pmd, addr, entry, page);
1643 if (ret)
1644 goto out;
1645 pte = pte_offset_map(pmd, addr);
1646 }
1647 } while (pte++, addr += PAGE_SIZE, addr != end);
1648 pte_unmap(pte - 1);
1649out:
1650 return ret;
1651}
1652
1653static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
1654 unsigned long addr, unsigned long end,
1655 swp_entry_t entry, struct page *page)
1656{
1657 pmd_t *pmd;
1658 unsigned long next;
1659 int ret;
1660
1661 pmd = pmd_offset(pud, addr);
1662 do {
1663 cond_resched();
1664 next = pmd_addr_end(addr, end);
1665 if (pmd_none_or_trans_huge_or_clear_bad(pmd))
1666 continue;
1667 ret = unuse_pte_range(vma, pmd, addr, next, entry, page);
1668 if (ret)
1669 return ret;
1670 } while (pmd++, addr = next, addr != end);
1671 return 0;
1672}
1673
1674static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
1675 unsigned long addr, unsigned long end,
1676 swp_entry_t entry, struct page *page)
1677{
1678 pud_t *pud;
1679 unsigned long next;
1680 int ret;
1681
1682 pud = pud_offset(p4d, addr);
1683 do {
1684 next = pud_addr_end(addr, end);
1685 if (pud_none_or_clear_bad(pud))
1686 continue;
1687 ret = unuse_pmd_range(vma, pud, addr, next, entry, page);
1688 if (ret)
1689 return ret;
1690 } while (pud++, addr = next, addr != end);
1691 return 0;
1692}
1693
1694static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd,
1695 unsigned long addr, unsigned long end,
1696 swp_entry_t entry, struct page *page)
1697{
1698 p4d_t *p4d;
1699 unsigned long next;
1700 int ret;
1701
1702 p4d = p4d_offset(pgd, addr);
1703 do {
1704 next = p4d_addr_end(addr, end);
1705 if (p4d_none_or_clear_bad(p4d))
1706 continue;
1707 ret = unuse_pud_range(vma, p4d, addr, next, entry, page);
1708 if (ret)
1709 return ret;
1710 } while (p4d++, addr = next, addr != end);
1711 return 0;
1712}
1713
1714static int unuse_vma(struct vm_area_struct *vma,
1715 swp_entry_t entry, struct page *page)
1716{
1717 pgd_t *pgd;
1718 unsigned long addr, end, next;
1719 int ret;
1720
1721 if (page_anon_vma(page)) {
1722 addr = page_address_in_vma(page, vma);
1723 if (addr == -EFAULT)
1724 return 0;
1725 else
1726 end = addr + PAGE_SIZE;
1727 } else {
1728 addr = vma->vm_start;
1729 end = vma->vm_end;
1730 }
1731
1732 pgd = pgd_offset(vma->vm_mm, addr);
1733 do {
1734 next = pgd_addr_end(addr, end);
1735 if (pgd_none_or_clear_bad(pgd))
1736 continue;
1737 ret = unuse_p4d_range(vma, pgd, addr, next, entry, page);
1738 if (ret)
1739 return ret;
1740 } while (pgd++, addr = next, addr != end);
1741 return 0;
1742}
1743
1744static int unuse_mm(struct mm_struct *mm,
1745 swp_entry_t entry, struct page *page)
1746{
1747 struct vm_area_struct *vma;
1748 int ret = 0;
1749
1750 if (!down_read_trylock(&mm->mmap_sem)) {
1751
1752
1753
1754
1755 activate_page(page);
1756 unlock_page(page);
1757 down_read(&mm->mmap_sem);
1758 lock_page(page);
1759 }
1760 for (vma = mm->mmap; vma; vma = vma->vm_next) {
1761 if (vma->anon_vma && (ret = unuse_vma(vma, entry, page)))
1762 break;
1763 cond_resched();
1764 }
1765 up_read(&mm->mmap_sem);
1766 return (ret < 0)? ret: 0;
1767}
1768
1769
1770
1771
1772
1773
1774static unsigned int find_next_to_unuse(struct swap_info_struct *si,
1775 unsigned int prev, bool frontswap)
1776{
1777 unsigned int max = si->max;
1778 unsigned int i = prev;
1779 unsigned char count;
1780
1781
1782
1783
1784
1785
1786
1787 for (;;) {
1788 if (++i >= max) {
1789 if (!prev) {
1790 i = 0;
1791 break;
1792 }
1793
1794
1795
1796
1797 max = prev + 1;
1798 prev = 0;
1799 i = 1;
1800 }
1801 count = READ_ONCE(si->swap_map[i]);
1802 if (count && swap_count(count) != SWAP_MAP_BAD)
1803 if (!frontswap || frontswap_test(si, i))
1804 break;
1805 if ((i % LATENCY_LIMIT) == 0)
1806 cond_resched();
1807 }
1808 return i;
1809}
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819int try_to_unuse(unsigned int type, bool frontswap,
1820 unsigned long pages_to_unuse)
1821{
1822 struct swap_info_struct *si = swap_info[type];
1823 struct mm_struct *start_mm;
1824 volatile unsigned char *swap_map;
1825
1826
1827
1828
1829 unsigned char swcount;
1830 struct page *page;
1831 swp_entry_t entry;
1832 unsigned int i = 0;
1833 int retval = 0;
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849 start_mm = &init_mm;
1850 mmget(&init_mm);
1851
1852
1853
1854
1855
1856
1857 while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
1858 if (signal_pending(current)) {
1859 retval = -EINTR;
1860 break;
1861 }
1862
1863
1864
1865
1866
1867
1868 swap_map = &si->swap_map[i];
1869 entry = swp_entry(type, i);
1870 page = read_swap_cache_async(entry,
1871 GFP_HIGHUSER_MOVABLE, NULL, 0, false);
1872 if (!page) {
1873
1874
1875
1876
1877
1878
1879 swcount = *swap_map;
1880
1881
1882
1883
1884
1885
1886
1887 if (!swcount || swcount == SWAP_MAP_BAD)
1888 continue;
1889 retval = -ENOMEM;
1890 break;
1891 }
1892
1893
1894
1895
1896 if (atomic_read(&start_mm->mm_users) == 1) {
1897 mmput(start_mm);
1898 start_mm = &init_mm;
1899 mmget(&init_mm);
1900 }
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910 wait_on_page_locked(page);
1911 wait_on_page_writeback(page);
1912 lock_page(page);
1913 wait_on_page_writeback(page);
1914
1915
1916
1917
1918 swcount = *swap_map;
1919 if (swap_count(swcount) == SWAP_MAP_SHMEM) {
1920 retval = shmem_unuse(entry, page);
1921
1922 if (retval < 0)
1923 break;
1924 continue;
1925 }
1926 if (swap_count(swcount) && start_mm != &init_mm)
1927 retval = unuse_mm(start_mm, entry, page);
1928
1929 if (swap_count(*swap_map)) {
1930 int set_start_mm = (*swap_map >= swcount);
1931 struct list_head *p = &start_mm->mmlist;
1932 struct mm_struct *new_start_mm = start_mm;
1933 struct mm_struct *prev_mm = start_mm;
1934 struct mm_struct *mm;
1935
1936 mmget(new_start_mm);
1937 mmget(prev_mm);
1938 spin_lock(&mmlist_lock);
1939 while (swap_count(*swap_map) && !retval &&
1940 (p = p->next) != &start_mm->mmlist) {
1941 mm = list_entry(p, struct mm_struct, mmlist);
1942 if (!mmget_not_zero(mm))
1943 continue;
1944 spin_unlock(&mmlist_lock);
1945 mmput(prev_mm);
1946 prev_mm = mm;
1947
1948 cond_resched();
1949
1950 swcount = *swap_map;
1951 if (!swap_count(swcount))
1952 ;
1953 else if (mm == &init_mm)
1954 set_start_mm = 1;
1955 else
1956 retval = unuse_mm(mm, entry, page);
1957
1958 if (set_start_mm && *swap_map < swcount) {
1959 mmput(new_start_mm);
1960 mmget(mm);
1961 new_start_mm = mm;
1962 set_start_mm = 0;
1963 }
1964 spin_lock(&mmlist_lock);
1965 }
1966 spin_unlock(&mmlist_lock);
1967 mmput(prev_mm);
1968 mmput(start_mm);
1969 start_mm = new_start_mm;
1970 }
1971 if (retval) {
1972 unlock_page(page);
1973 put_page(page);
1974 break;
1975 }
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996 if (swap_count(*swap_map) &&
1997 PageDirty(page) && PageSwapCache(page)) {
1998 struct writeback_control wbc = {
1999 .sync_mode = WB_SYNC_NONE,
2000 };
2001
2002 swap_writepage(page, &wbc);
2003 lock_page(page);
2004 wait_on_page_writeback(page);
2005 }
2006
2007
2008
2009
2010
2011
2012
2013
2014 if (PageSwapCache(page) &&
2015 likely(page_private(page) == entry.val))
2016 delete_from_swap_cache(page);
2017
2018
2019
2020
2021
2022
2023 SetPageDirty(page);
2024 unlock_page(page);
2025 put_page(page);
2026
2027
2028
2029
2030
2031 cond_resched();
2032 if (frontswap && pages_to_unuse > 0) {
2033 if (!--pages_to_unuse)
2034 break;
2035 }
2036 }
2037
2038 mmput(start_mm);
2039 return retval;
2040}
2041
2042
2043
2044
2045
2046
2047
2048static void drain_mmlist(void)
2049{
2050 struct list_head *p, *next;
2051 unsigned int type;
2052
2053 for (type = 0; type < nr_swapfiles; type++)
2054 if (swap_info[type]->inuse_pages)
2055 return;
2056 spin_lock(&mmlist_lock);
2057 list_for_each_safe(p, next, &init_mm.mmlist)
2058 list_del_init(p);
2059 spin_unlock(&mmlist_lock);
2060}
2061
2062
2063
2064
2065
2066
2067
2068static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
2069{
2070 struct swap_info_struct *sis;
2071 struct swap_extent *start_se;
2072 struct swap_extent *se;
2073 pgoff_t offset;
2074
2075 sis = swap_info[swp_type(entry)];
2076 *bdev = sis->bdev;
2077
2078 offset = swp_offset(entry);
2079 start_se = sis->curr_swap_extent;
2080 se = start_se;
2081
2082 for ( ; ; ) {
2083 if (se->start_page <= offset &&
2084 offset < (se->start_page + se->nr_pages)) {
2085 return se->start_block + (offset - se->start_page);
2086 }
2087 se = list_next_entry(se, list);
2088 sis->curr_swap_extent = se;
2089 BUG_ON(se == start_se);
2090 }
2091}
2092
2093
2094
2095
2096sector_t map_swap_page(struct page *page, struct block_device **bdev)
2097{
2098 swp_entry_t entry;
2099 entry.val = page_private(page);
2100 return map_swap_entry(entry, bdev);
2101}
2102
2103
2104
2105
2106static void destroy_swap_extents(struct swap_info_struct *sis)
2107{
2108 while (!list_empty(&sis->first_swap_extent.list)) {
2109 struct swap_extent *se;
2110
2111 se = list_first_entry(&sis->first_swap_extent.list,
2112 struct swap_extent, list);
2113 list_del(&se->list);
2114 kfree(se);
2115 }
2116
2117 if (sis->flags & SWP_FILE) {
2118 struct file *swap_file = sis->swap_file;
2119 struct address_space *mapping = swap_file->f_mapping;
2120
2121 sis->flags &= ~SWP_FILE;
2122 mapping->a_ops->swap_deactivate(swap_file);
2123 }
2124}
2125
2126
2127
2128
2129
2130
2131
2132int
2133add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
2134 unsigned long nr_pages, sector_t start_block)
2135{
2136 struct swap_extent *se;
2137 struct swap_extent *new_se;
2138 struct list_head *lh;
2139
2140 if (start_page == 0) {
2141 se = &sis->first_swap_extent;
2142 sis->curr_swap_extent = se;
2143 se->start_page = 0;
2144 se->nr_pages = nr_pages;
2145 se->start_block = start_block;
2146 return 1;
2147 } else {
2148 lh = sis->first_swap_extent.list.prev;
2149 se = list_entry(lh, struct swap_extent, list);
2150 BUG_ON(se->start_page + se->nr_pages != start_page);
2151 if (se->start_block + se->nr_pages == start_block) {
2152
2153 se->nr_pages += nr_pages;
2154 return 0;
2155 }
2156 }
2157
2158
2159
2160
2161 new_se = kmalloc(sizeof(*se), GFP_KERNEL);
2162 if (new_se == NULL)
2163 return -ENOMEM;
2164 new_se->start_page = start_page;
2165 new_se->nr_pages = nr_pages;
2166 new_se->start_block = start_block;
2167
2168 list_add_tail(&new_se->list, &sis->first_swap_extent.list);
2169 return 1;
2170}
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
2204{
2205 struct file *swap_file = sis->swap_file;
2206 struct address_space *mapping = swap_file->f_mapping;
2207 struct inode *inode = mapping->host;
2208 int ret;
2209
2210 if (S_ISBLK(inode->i_mode)) {
2211 ret = add_swap_extent(sis, 0, sis->max, 0);
2212 *span = sis->pages;
2213 return ret;
2214 }
2215
2216 if (mapping->a_ops->swap_activate) {
2217 ret = mapping->a_ops->swap_activate(sis, swap_file, span);
2218 if (!ret) {
2219 sis->flags |= SWP_FILE;
2220 ret = add_swap_extent(sis, 0, sis->max, 0);
2221 *span = sis->pages;
2222 }
2223 return ret;
2224 }
2225
2226 return generic_swapfile_activate(sis, swap_file, span);
2227}
2228
2229static void _enable_swap_info(struct swap_info_struct *p, int prio,
2230 unsigned char *swap_map,
2231 struct swap_cluster_info *cluster_info)
2232{
2233 if (prio >= 0)
2234 p->prio = prio;
2235 else
2236 p->prio = --least_priority;
2237
2238
2239
2240
2241 p->list.prio = -p->prio;
2242 p->avail_list.prio = -p->prio;
2243 p->swap_map = swap_map;
2244 p->cluster_info = cluster_info;
2245 p->flags |= SWP_WRITEOK;
2246 atomic_long_add(p->pages, &nr_swap_pages);
2247 total_swap_pages += p->pages;
2248
2249 assert_spin_locked(&swap_lock);
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260 plist_add(&p->list, &swap_active_head);
2261 spin_lock(&swap_avail_lock);
2262 plist_add(&p->avail_list, &swap_avail_head);
2263 spin_unlock(&swap_avail_lock);
2264}
2265
2266static void enable_swap_info(struct swap_info_struct *p, int prio,
2267 unsigned char *swap_map,
2268 struct swap_cluster_info *cluster_info,
2269 unsigned long *frontswap_map)
2270{
2271 frontswap_init(p->type, frontswap_map);
2272 spin_lock(&swap_lock);
2273 spin_lock(&p->lock);
2274 _enable_swap_info(p, prio, swap_map, cluster_info);
2275 spin_unlock(&p->lock);
2276 spin_unlock(&swap_lock);
2277}
2278
2279static void reinsert_swap_info(struct swap_info_struct *p)
2280{
2281 spin_lock(&swap_lock);
2282 spin_lock(&p->lock);
2283 _enable_swap_info(p, p->prio, p->swap_map, p->cluster_info);
2284 spin_unlock(&p->lock);
2285 spin_unlock(&swap_lock);
2286}
2287
2288bool has_usable_swap(void)
2289{
2290 bool ret = true;
2291
2292 spin_lock(&swap_lock);
2293 if (plist_head_empty(&swap_active_head))
2294 ret = false;
2295 spin_unlock(&swap_lock);
2296 return ret;
2297}
2298
2299SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
2300{
2301 struct swap_info_struct *p = NULL;
2302 unsigned char *swap_map;
2303 struct swap_cluster_info *cluster_info;
2304 unsigned long *frontswap_map;
2305 struct file *swap_file, *victim;
2306 struct address_space *mapping;
2307 struct inode *inode;
2308 struct filename *pathname;
2309 int err, found = 0;
2310 unsigned int old_block_size;
2311
2312 if (!capable(CAP_SYS_ADMIN))
2313 return -EPERM;
2314
2315 BUG_ON(!current->mm);
2316
2317 pathname = getname(specialfile);
2318 if (IS_ERR(pathname))
2319 return PTR_ERR(pathname);
2320
2321 victim = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0);
2322 err = PTR_ERR(victim);
2323 if (IS_ERR(victim))
2324 goto out;
2325
2326 mapping = victim->f_mapping;
2327 spin_lock(&swap_lock);
2328 plist_for_each_entry(p, &swap_active_head, list) {
2329 if (p->flags & SWP_WRITEOK) {
2330 if (p->swap_file->f_mapping == mapping) {
2331 found = 1;
2332 break;
2333 }
2334 }
2335 }
2336 if (!found) {
2337 err = -EINVAL;
2338 spin_unlock(&swap_lock);
2339 goto out_dput;
2340 }
2341 if (!security_vm_enough_memory_mm(current->mm, p->pages))
2342 vm_unacct_memory(p->pages);
2343 else {
2344 err = -ENOMEM;
2345 spin_unlock(&swap_lock);
2346 goto out_dput;
2347 }
2348 spin_lock(&swap_avail_lock);
2349 plist_del(&p->avail_list, &swap_avail_head);
2350 spin_unlock(&swap_avail_lock);
2351 spin_lock(&p->lock);
2352 if (p->prio < 0) {
2353 struct swap_info_struct *si = p;
2354
2355 plist_for_each_entry_continue(si, &swap_active_head, list) {
2356 si->prio++;
2357 si->list.prio--;
2358 si->avail_list.prio--;
2359 }
2360 least_priority++;
2361 }
2362 plist_del(&p->list, &swap_active_head);
2363 atomic_long_sub(p->pages, &nr_swap_pages);
2364 total_swap_pages -= p->pages;
2365 p->flags &= ~SWP_WRITEOK;
2366 spin_unlock(&p->lock);
2367 spin_unlock(&swap_lock);
2368
2369 disable_swap_slots_cache_lock();
2370
2371 set_current_oom_origin();
2372 err = try_to_unuse(p->type, false, 0);
2373 clear_current_oom_origin();
2374
2375 if (err) {
2376
2377 reinsert_swap_info(p);
2378 reenable_swap_slots_cache_unlock();
2379 goto out_dput;
2380 }
2381
2382 reenable_swap_slots_cache_unlock();
2383
2384 flush_work(&p->discard_work);
2385
2386 destroy_swap_extents(p);
2387 if (p->flags & SWP_CONTINUED)
2388 free_swap_count_continuations(p);
2389
2390 mutex_lock(&swapon_mutex);
2391 spin_lock(&swap_lock);
2392 spin_lock(&p->lock);
2393 drain_mmlist();
2394
2395
2396 p->highest_bit = 0;
2397 while (p->flags >= SWP_SCANNING) {
2398 spin_unlock(&p->lock);
2399 spin_unlock(&swap_lock);
2400 schedule_timeout_uninterruptible(1);
2401 spin_lock(&swap_lock);
2402 spin_lock(&p->lock);
2403 }
2404
2405 swap_file = p->swap_file;
2406 old_block_size = p->old_block_size;
2407 p->swap_file = NULL;
2408 p->max = 0;
2409 swap_map = p->swap_map;
2410 p->swap_map = NULL;
2411 cluster_info = p->cluster_info;
2412 p->cluster_info = NULL;
2413 frontswap_map = frontswap_map_get(p);
2414 spin_unlock(&p->lock);
2415 spin_unlock(&swap_lock);
2416 frontswap_invalidate_area(p->type);
2417 frontswap_map_set(p, NULL);
2418 mutex_unlock(&swapon_mutex);
2419 free_percpu(p->percpu_cluster);
2420 p->percpu_cluster = NULL;
2421 vfree(swap_map);
2422 kvfree(cluster_info);
2423 kvfree(frontswap_map);
2424
2425 swap_cgroup_swapoff(p->type);
2426 exit_swap_address_space(p->type);
2427
2428 inode = mapping->host;
2429 if (S_ISBLK(inode->i_mode)) {
2430 struct block_device *bdev = I_BDEV(inode);
2431 set_blocksize(bdev, old_block_size);
2432 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2433 } else {
2434 inode_lock(inode);
2435 inode->i_flags &= ~S_SWAPFILE;
2436 inode_unlock(inode);
2437 }
2438 filp_close(swap_file, NULL);
2439
2440
2441
2442
2443
2444
2445 spin_lock(&swap_lock);
2446 p->flags = 0;
2447 spin_unlock(&swap_lock);
2448
2449 err = 0;
2450 atomic_inc(&proc_poll_event);
2451 wake_up_interruptible(&proc_poll_wait);
2452
2453out_dput:
2454 filp_close(victim, NULL);
2455out:
2456 putname(pathname);
2457 return err;
2458}
2459
2460#ifdef CONFIG_PROC_FS
2461static unsigned swaps_poll(struct file *file, poll_table *wait)
2462{
2463 struct seq_file *seq = file->private_data;
2464
2465 poll_wait(file, &proc_poll_wait, wait);
2466
2467 if (seq->poll_event != atomic_read(&proc_poll_event)) {
2468 seq->poll_event = atomic_read(&proc_poll_event);
2469 return POLLIN | POLLRDNORM | POLLERR | POLLPRI;
2470 }
2471
2472 return POLLIN | POLLRDNORM;
2473}
2474
2475
2476static void *swap_start(struct seq_file *swap, loff_t *pos)
2477{
2478 struct swap_info_struct *si;
2479 int type;
2480 loff_t l = *pos;
2481
2482 mutex_lock(&swapon_mutex);
2483
2484 if (!l)
2485 return SEQ_START_TOKEN;
2486
2487 for (type = 0; type < nr_swapfiles; type++) {
2488 smp_rmb();
2489 si = swap_info[type];
2490 if (!(si->flags & SWP_USED) || !si->swap_map)
2491 continue;
2492 if (!--l)
2493 return si;
2494 }
2495
2496 return NULL;
2497}
2498
2499static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
2500{
2501 struct swap_info_struct *si = v;
2502 int type;
2503
2504 if (v == SEQ_START_TOKEN)
2505 type = 0;
2506 else
2507 type = si->type + 1;
2508
2509 for (; type < nr_swapfiles; type++) {
2510 smp_rmb();
2511 si = swap_info[type];
2512 if (!(si->flags & SWP_USED) || !si->swap_map)
2513 continue;
2514 ++*pos;
2515 return si;
2516 }
2517
2518 return NULL;
2519}
2520
2521static void swap_stop(struct seq_file *swap, void *v)
2522{
2523 mutex_unlock(&swapon_mutex);
2524}
2525
2526static int swap_show(struct seq_file *swap, void *v)
2527{
2528 struct swap_info_struct *si = v;
2529 struct file *file;
2530 int len;
2531
2532 if (si == SEQ_START_TOKEN) {
2533 seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
2534 return 0;
2535 }
2536
2537 file = si->swap_file;
2538 len = seq_file_path(swap, file, " \t\n\\");
2539 seq_printf(swap, "%*s%s\t%u\t%u\t%d\n",
2540 len < 40 ? 40 - len : 1, " ",
2541 S_ISBLK(file_inode(file)->i_mode) ?
2542 "partition" : "file\t",
2543 si->pages << (PAGE_SHIFT - 10),
2544 si->inuse_pages << (PAGE_SHIFT - 10),
2545 si->prio);
2546 return 0;
2547}
2548
2549static const struct seq_operations swaps_op = {
2550 .start = swap_start,
2551 .next = swap_next,
2552 .stop = swap_stop,
2553 .show = swap_show
2554};
2555
2556static int swaps_open(struct inode *inode, struct file *file)
2557{
2558 struct seq_file *seq;
2559 int ret;
2560
2561 ret = seq_open(file, &swaps_op);
2562 if (ret)
2563 return ret;
2564
2565 seq = file->private_data;
2566 seq->poll_event = atomic_read(&proc_poll_event);
2567 return 0;
2568}
2569
2570static const struct file_operations proc_swaps_operations = {
2571 .open = swaps_open,
2572 .read = seq_read,
2573 .llseek = seq_lseek,
2574 .release = seq_release,
2575 .poll = swaps_poll,
2576};
2577
2578static int __init procswaps_init(void)
2579{
2580 proc_create("swaps", 0, NULL, &proc_swaps_operations);
2581 return 0;
2582}
2583__initcall(procswaps_init);
2584#endif
2585
2586#ifdef MAX_SWAPFILES_CHECK
2587static int __init max_swapfiles_check(void)
2588{
2589 MAX_SWAPFILES_CHECK();
2590 return 0;
2591}
2592late_initcall(max_swapfiles_check);
2593#endif
2594
2595static struct swap_info_struct *alloc_swap_info(void)
2596{
2597 struct swap_info_struct *p;
2598 unsigned int type;
2599
2600 p = kzalloc(sizeof(*p), GFP_KERNEL);
2601 if (!p)
2602 return ERR_PTR(-ENOMEM);
2603
2604 spin_lock(&swap_lock);
2605 for (type = 0; type < nr_swapfiles; type++) {
2606 if (!(swap_info[type]->flags & SWP_USED))
2607 break;
2608 }
2609 if (type >= MAX_SWAPFILES) {
2610 spin_unlock(&swap_lock);
2611 kfree(p);
2612 return ERR_PTR(-EPERM);
2613 }
2614 if (type >= nr_swapfiles) {
2615 p->type = type;
2616 swap_info[type] = p;
2617
2618
2619
2620
2621
2622 smp_wmb();
2623 nr_swapfiles++;
2624 } else {
2625 kfree(p);
2626 p = swap_info[type];
2627
2628
2629
2630
2631 }
2632 INIT_LIST_HEAD(&p->first_swap_extent.list);
2633 plist_node_init(&p->list, 0);
2634 plist_node_init(&p->avail_list, 0);
2635 p->flags = SWP_USED;
2636 spin_unlock(&swap_lock);
2637 spin_lock_init(&p->lock);
2638
2639 return p;
2640}
2641
2642static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
2643{
2644 int error;
2645
2646 if (S_ISBLK(inode->i_mode)) {
2647 p->bdev = bdgrab(I_BDEV(inode));
2648 error = blkdev_get(p->bdev,
2649 FMODE_READ | FMODE_WRITE | FMODE_EXCL, p);
2650 if (error < 0) {
2651 p->bdev = NULL;
2652 return error;
2653 }
2654 p->old_block_size = block_size(p->bdev);
2655 error = set_blocksize(p->bdev, PAGE_SIZE);
2656 if (error < 0)
2657 return error;
2658 p->flags |= SWP_BLKDEV;
2659 } else if (S_ISREG(inode->i_mode)) {
2660 p->bdev = inode->i_sb->s_bdev;
2661 inode_lock(inode);
2662 if (IS_SWAPFILE(inode))
2663 return -EBUSY;
2664 } else
2665 return -EINVAL;
2666
2667 return 0;
2668}
2669
2670static unsigned long read_swap_header(struct swap_info_struct *p,
2671 union swap_header *swap_header,
2672 struct inode *inode)
2673{
2674 int i;
2675 unsigned long maxpages;
2676 unsigned long swapfilepages;
2677 unsigned long last_page;
2678
2679 if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
2680 pr_err("Unable to find swap-space signature\n");
2681 return 0;
2682 }
2683
2684
2685 if (swab32(swap_header->info.version) == 1) {
2686 swab32s(&swap_header->info.version);
2687 swab32s(&swap_header->info.last_page);
2688 swab32s(&swap_header->info.nr_badpages);
2689 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
2690 return 0;
2691 for (i = 0; i < swap_header->info.nr_badpages; i++)
2692 swab32s(&swap_header->info.badpages[i]);
2693 }
2694
2695 if (swap_header->info.version != 1) {
2696 pr_warn("Unable to handle swap header version %d\n",
2697 swap_header->info.version);
2698 return 0;
2699 }
2700
2701 p->lowest_bit = 1;
2702 p->cluster_next = 1;
2703 p->cluster_nr = 0;
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719 maxpages = swp_offset(pte_to_swp_entry(
2720 swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
2721 last_page = swap_header->info.last_page;
2722 if (last_page > maxpages) {
2723 pr_warn("Truncating oversized swap area, only using %luk out of %luk\n",
2724 maxpages << (PAGE_SHIFT - 10),
2725 last_page << (PAGE_SHIFT - 10));
2726 }
2727 if (maxpages > last_page) {
2728 maxpages = last_page + 1;
2729
2730 if ((unsigned int)maxpages == 0)
2731 maxpages = UINT_MAX;
2732 }
2733 p->highest_bit = maxpages - 1;
2734
2735 if (!maxpages)
2736 return 0;
2737 swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
2738 if (swapfilepages && maxpages > swapfilepages) {
2739 pr_warn("Swap area shorter than signature indicates\n");
2740 return 0;
2741 }
2742 if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
2743 return 0;
2744 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
2745 return 0;
2746
2747 return maxpages;
2748}
2749
2750#define SWAP_CLUSTER_INFO_COLS \
2751 DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info))
2752#define SWAP_CLUSTER_SPACE_COLS \
2753 DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER)
2754#define SWAP_CLUSTER_COLS \
2755 max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS)
2756
2757static int setup_swap_map_and_extents(struct swap_info_struct *p,
2758 union swap_header *swap_header,
2759 unsigned char *swap_map,
2760 struct swap_cluster_info *cluster_info,
2761 unsigned long maxpages,
2762 sector_t *span)
2763{
2764 unsigned int j, k;
2765 unsigned int nr_good_pages;
2766 int nr_extents;
2767 unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
2768 unsigned long col = p->cluster_next / SWAPFILE_CLUSTER % SWAP_CLUSTER_COLS;
2769 unsigned long i, idx;
2770
2771 nr_good_pages = maxpages - 1;
2772
2773 cluster_list_init(&p->free_clusters);
2774 cluster_list_init(&p->discard_clusters);
2775
2776 for (i = 0; i < swap_header->info.nr_badpages; i++) {
2777 unsigned int page_nr = swap_header->info.badpages[i];
2778 if (page_nr == 0 || page_nr > swap_header->info.last_page)
2779 return -EINVAL;
2780 if (page_nr < maxpages) {
2781 swap_map[page_nr] = SWAP_MAP_BAD;
2782 nr_good_pages--;
2783
2784
2785
2786
2787 inc_cluster_info_page(p, cluster_info, page_nr);
2788 }
2789 }
2790
2791
2792 for (i = maxpages; i < round_up(maxpages, SWAPFILE_CLUSTER); i++)
2793 inc_cluster_info_page(p, cluster_info, i);
2794
2795 if (nr_good_pages) {
2796 swap_map[0] = SWAP_MAP_BAD;
2797
2798
2799
2800
2801 inc_cluster_info_page(p, cluster_info, 0);
2802 p->max = maxpages;
2803 p->pages = nr_good_pages;
2804 nr_extents = setup_swap_extents(p, span);
2805 if (nr_extents < 0)
2806 return nr_extents;
2807 nr_good_pages = p->pages;
2808 }
2809 if (!nr_good_pages) {
2810 pr_warn("Empty swap-file\n");
2811 return -EINVAL;
2812 }
2813
2814 if (!cluster_info)
2815 return nr_extents;
2816
2817
2818
2819
2820
2821
2822 for (k = 0; k < SWAP_CLUSTER_COLS; k++) {
2823 j = (k + col) % SWAP_CLUSTER_COLS;
2824 for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) {
2825 idx = i * SWAP_CLUSTER_COLS + j;
2826 if (idx >= nr_clusters)
2827 continue;
2828 if (cluster_count(&cluster_info[idx]))
2829 continue;
2830 cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
2831 cluster_list_add_tail(&p->free_clusters, cluster_info,
2832 idx);
2833 }
2834 }
2835 return nr_extents;
2836}
2837
2838
2839
2840
2841
2842static bool swap_discardable(struct swap_info_struct *si)
2843{
2844 struct request_queue *q = bdev_get_queue(si->bdev);
2845
2846 if (!q || !blk_queue_discard(q))
2847 return false;
2848
2849 return true;
2850}
2851
2852SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2853{
2854 struct swap_info_struct *p;
2855 struct filename *name;
2856 struct file *swap_file = NULL;
2857 struct address_space *mapping;
2858 int prio;
2859 int error;
2860 union swap_header *swap_header;
2861 int nr_extents;
2862 sector_t span;
2863 unsigned long maxpages;
2864 unsigned char *swap_map = NULL;
2865 struct swap_cluster_info *cluster_info = NULL;
2866 unsigned long *frontswap_map = NULL;
2867 struct page *page = NULL;
2868 struct inode *inode = NULL;
2869
2870 if (swap_flags & ~SWAP_FLAGS_VALID)
2871 return -EINVAL;
2872
2873 if (!capable(CAP_SYS_ADMIN))
2874 return -EPERM;
2875
2876 p = alloc_swap_info();
2877 if (IS_ERR(p))
2878 return PTR_ERR(p);
2879
2880 INIT_WORK(&p->discard_work, swap_discard_work);
2881
2882 name = getname(specialfile);
2883 if (IS_ERR(name)) {
2884 error = PTR_ERR(name);
2885 name = NULL;
2886 goto bad_swap;
2887 }
2888 swap_file = file_open_name(name, O_RDWR|O_LARGEFILE, 0);
2889 if (IS_ERR(swap_file)) {
2890 error = PTR_ERR(swap_file);
2891 swap_file = NULL;
2892 goto bad_swap;
2893 }
2894
2895 p->swap_file = swap_file;
2896 mapping = swap_file->f_mapping;
2897 inode = mapping->host;
2898
2899
2900 error = claim_swapfile(p, inode);
2901 if (unlikely(error))
2902 goto bad_swap;
2903
2904
2905
2906
2907 if (!mapping->a_ops->readpage) {
2908 error = -EINVAL;
2909 goto bad_swap;
2910 }
2911 page = read_mapping_page(mapping, 0, swap_file);
2912 if (IS_ERR(page)) {
2913 error = PTR_ERR(page);
2914 goto bad_swap;
2915 }
2916 swap_header = kmap(page);
2917
2918 maxpages = read_swap_header(p, swap_header, inode);
2919 if (unlikely(!maxpages)) {
2920 error = -EINVAL;
2921 goto bad_swap;
2922 }
2923
2924
2925 swap_map = vzalloc(maxpages);
2926 if (!swap_map) {
2927 error = -ENOMEM;
2928 goto bad_swap;
2929 }
2930
2931 if (bdi_cap_stable_pages_required(inode_to_bdi(inode)))
2932 p->flags |= SWP_STABLE_WRITES;
2933
2934 if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
2935 int cpu;
2936 unsigned long ci, nr_cluster;
2937
2938 p->flags |= SWP_SOLIDSTATE;
2939
2940
2941
2942
2943 p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
2944 nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
2945
2946 cluster_info = kvzalloc(nr_cluster * sizeof(*cluster_info),
2947 GFP_KERNEL);
2948 if (!cluster_info) {
2949 error = -ENOMEM;
2950 goto bad_swap;
2951 }
2952
2953 for (ci = 0; ci < nr_cluster; ci++)
2954 spin_lock_init(&((cluster_info + ci)->lock));
2955
2956 p->percpu_cluster = alloc_percpu(struct percpu_cluster);
2957 if (!p->percpu_cluster) {
2958 error = -ENOMEM;
2959 goto bad_swap;
2960 }
2961 for_each_possible_cpu(cpu) {
2962 struct percpu_cluster *cluster;
2963 cluster = per_cpu_ptr(p->percpu_cluster, cpu);
2964 cluster_set_null(&cluster->index);
2965 }
2966 }
2967
2968 error = swap_cgroup_swapon(p->type, maxpages);
2969 if (error)
2970 goto bad_swap;
2971
2972 nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
2973 cluster_info, maxpages, &span);
2974 if (unlikely(nr_extents < 0)) {
2975 error = nr_extents;
2976 goto bad_swap;
2977 }
2978
2979 if (IS_ENABLED(CONFIG_FRONTSWAP))
2980 frontswap_map = kvzalloc(BITS_TO_LONGS(maxpages) * sizeof(long),
2981 GFP_KERNEL);
2982
2983 if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
2984
2985
2986
2987
2988
2989
2990 p->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD |
2991 SWP_PAGE_DISCARD);
2992
2993
2994
2995
2996
2997
2998
2999 if (swap_flags & SWAP_FLAG_DISCARD_ONCE)
3000 p->flags &= ~SWP_PAGE_DISCARD;
3001 else if (swap_flags & SWAP_FLAG_DISCARD_PAGES)
3002 p->flags &= ~SWP_AREA_DISCARD;
3003
3004
3005 if (p->flags & SWP_AREA_DISCARD) {
3006 int err = discard_swap(p);
3007 if (unlikely(err))
3008 pr_err("swapon: discard_swap(%p): %d\n",
3009 p, err);
3010 }
3011 }
3012
3013 error = init_swap_address_space(p->type, maxpages);
3014 if (error)
3015 goto bad_swap;
3016
3017 mutex_lock(&swapon_mutex);
3018 prio = -1;
3019 if (swap_flags & SWAP_FLAG_PREFER)
3020 prio =
3021 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
3022 enable_swap_info(p, prio, swap_map, cluster_info, frontswap_map);
3023
3024 pr_info("Adding %uk swap on %s. Priority:%d extents:%d across:%lluk %s%s%s%s%s\n",
3025 p->pages<<(PAGE_SHIFT-10), name->name, p->prio,
3026 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
3027 (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
3028 (p->flags & SWP_DISCARDABLE) ? "D" : "",
3029 (p->flags & SWP_AREA_DISCARD) ? "s" : "",
3030 (p->flags & SWP_PAGE_DISCARD) ? "c" : "",
3031 (frontswap_map) ? "FS" : "");
3032
3033 mutex_unlock(&swapon_mutex);
3034 atomic_inc(&proc_poll_event);
3035 wake_up_interruptible(&proc_poll_wait);
3036
3037 if (S_ISREG(inode->i_mode))
3038 inode->i_flags |= S_SWAPFILE;
3039 error = 0;
3040 goto out;
3041bad_swap:
3042 free_percpu(p->percpu_cluster);
3043 p->percpu_cluster = NULL;
3044 if (inode && S_ISBLK(inode->i_mode) && p->bdev) {
3045 set_blocksize(p->bdev, p->old_block_size);
3046 blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
3047 }
3048 destroy_swap_extents(p);
3049 swap_cgroup_swapoff(p->type);
3050 spin_lock(&swap_lock);
3051 p->swap_file = NULL;
3052 p->flags = 0;
3053 spin_unlock(&swap_lock);
3054 vfree(swap_map);
3055 vfree(cluster_info);
3056 if (swap_file) {
3057 if (inode && S_ISREG(inode->i_mode)) {
3058 inode_unlock(inode);
3059 inode = NULL;
3060 }
3061 filp_close(swap_file, NULL);
3062 }
3063out:
3064 if (page && !IS_ERR(page)) {
3065 kunmap(page);
3066 put_page(page);
3067 }
3068 if (name)
3069 putname(name);
3070 if (inode && S_ISREG(inode->i_mode))
3071 inode_unlock(inode);
3072 if (!error)
3073 enable_swap_slots_cache();
3074 return error;
3075}
3076
3077void si_swapinfo(struct sysinfo *val)
3078{
3079 unsigned int type;
3080 unsigned long nr_to_be_unused = 0;
3081
3082 spin_lock(&swap_lock);
3083 for (type = 0; type < nr_swapfiles; type++) {
3084 struct swap_info_struct *si = swap_info[type];
3085
3086 if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK))
3087 nr_to_be_unused += si->inuse_pages;
3088 }
3089 val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused;
3090 val->totalswap = total_swap_pages + nr_to_be_unused;
3091 spin_unlock(&swap_lock);
3092}
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
3106{
3107 struct swap_info_struct *p;
3108 struct swap_cluster_info *ci;
3109 unsigned long offset, type;
3110 unsigned char count;
3111 unsigned char has_cache;
3112 int err = -EINVAL;
3113
3114 if (non_swap_entry(entry))
3115 goto out;
3116
3117 type = swp_type(entry);
3118 if (type >= nr_swapfiles)
3119 goto bad_file;
3120 p = swap_info[type];
3121 offset = swp_offset(entry);
3122 if (unlikely(offset >= p->max))
3123 goto out;
3124
3125 ci = lock_cluster_or_swap_info(p, offset);
3126
3127 count = p->swap_map[offset];
3128
3129
3130
3131
3132
3133 if (unlikely(swap_count(count) == SWAP_MAP_BAD)) {
3134 err = -ENOENT;
3135 goto unlock_out;
3136 }
3137
3138 has_cache = count & SWAP_HAS_CACHE;
3139 count &= ~SWAP_HAS_CACHE;
3140 err = 0;
3141
3142 if (usage == SWAP_HAS_CACHE) {
3143
3144
3145 if (!has_cache && count)
3146 has_cache = SWAP_HAS_CACHE;
3147 else if (has_cache)
3148 err = -EEXIST;
3149 else
3150 err = -ENOENT;
3151
3152 } else if (count || has_cache) {
3153
3154 if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
3155 count += usage;
3156 else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX)
3157 err = -EINVAL;
3158 else if (swap_count_continued(p, offset, count))
3159 count = COUNT_CONTINUED;
3160 else
3161 err = -ENOMEM;
3162 } else
3163 err = -ENOENT;
3164
3165 p->swap_map[offset] = count | has_cache;
3166
3167unlock_out:
3168 unlock_cluster_or_swap_info(p, ci);
3169out:
3170 return err;
3171
3172bad_file:
3173 pr_err("swap_dup: %s%08lx\n", Bad_file, entry.val);
3174 goto out;
3175}
3176
3177
3178
3179
3180
3181void swap_shmem_alloc(swp_entry_t entry)
3182{
3183 __swap_duplicate(entry, SWAP_MAP_SHMEM);
3184}
3185
3186
3187
3188
3189
3190
3191
3192
3193int swap_duplicate(swp_entry_t entry)
3194{
3195 int err = 0;
3196
3197 while (!err && __swap_duplicate(entry, 1) == -ENOMEM)
3198 err = add_swap_count_continuation(entry, GFP_ATOMIC);
3199 return err;
3200}
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210int swapcache_prepare(swp_entry_t entry)
3211{
3212 return __swap_duplicate(entry, SWAP_HAS_CACHE);
3213}
3214
3215struct swap_info_struct *page_swap_info(struct page *page)
3216{
3217 swp_entry_t swap = { .val = page_private(page) };
3218 return swap_info[swp_type(swap)];
3219}
3220
3221
3222
3223
3224struct address_space *__page_file_mapping(struct page *page)
3225{
3226 VM_BUG_ON_PAGE(!PageSwapCache(page), page);
3227 return page_swap_info(page)->swap_file->f_mapping;
3228}
3229EXPORT_SYMBOL_GPL(__page_file_mapping);
3230
3231pgoff_t __page_file_index(struct page *page)
3232{
3233 swp_entry_t swap = { .val = page_private(page) };
3234 VM_BUG_ON_PAGE(!PageSwapCache(page), page);
3235 return swp_offset(swap);
3236}
3237EXPORT_SYMBOL_GPL(__page_file_index);
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
3255{
3256 struct swap_info_struct *si;
3257 struct swap_cluster_info *ci;
3258 struct page *head;
3259 struct page *page;
3260 struct page *list_page;
3261 pgoff_t offset;
3262 unsigned char count;
3263
3264
3265
3266
3267
3268 page = alloc_page(gfp_mask | __GFP_HIGHMEM);
3269
3270 si = swap_info_get(entry);
3271 if (!si) {
3272
3273
3274
3275
3276
3277 goto outer;
3278 }
3279
3280 offset = swp_offset(entry);
3281
3282 ci = lock_cluster(si, offset);
3283
3284 count = si->swap_map[offset] & ~SWAP_HAS_CACHE;
3285
3286 if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) {
3287
3288
3289
3290
3291
3292 goto out;
3293 }
3294
3295 if (!page) {
3296 unlock_cluster(ci);
3297 spin_unlock(&si->lock);
3298 return -ENOMEM;
3299 }
3300
3301
3302
3303
3304
3305
3306 head = vmalloc_to_page(si->swap_map + offset);
3307 offset &= ~PAGE_MASK;
3308
3309
3310
3311
3312
3313 if (!page_private(head)) {
3314 BUG_ON(count & COUNT_CONTINUED);
3315 INIT_LIST_HEAD(&head->lru);
3316 set_page_private(head, SWP_CONTINUED);
3317 si->flags |= SWP_CONTINUED;
3318 }
3319
3320 list_for_each_entry(list_page, &head->lru, lru) {
3321 unsigned char *map;
3322
3323
3324
3325
3326
3327 if (!(count & COUNT_CONTINUED))
3328 goto out;
3329
3330 map = kmap_atomic(list_page) + offset;
3331 count = *map;
3332 kunmap_atomic(map);
3333
3334
3335
3336
3337
3338 if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX)
3339 goto out;
3340 }
3341
3342 list_add_tail(&page->lru, &head->lru);
3343 page = NULL;
3344out:
3345 unlock_cluster(ci);
3346 spin_unlock(&si->lock);
3347outer:
3348 if (page)
3349 __free_page(page);
3350 return 0;
3351}
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362static bool swap_count_continued(struct swap_info_struct *si,
3363 pgoff_t offset, unsigned char count)
3364{
3365 struct page *head;
3366 struct page *page;
3367 unsigned char *map;
3368
3369 head = vmalloc_to_page(si->swap_map + offset);
3370 if (page_private(head) != SWP_CONTINUED) {
3371 BUG_ON(count & COUNT_CONTINUED);
3372 return false;
3373 }
3374
3375 offset &= ~PAGE_MASK;
3376 page = list_entry(head->lru.next, struct page, lru);
3377 map = kmap_atomic(page) + offset;
3378
3379 if (count == SWAP_MAP_MAX)
3380 goto init_map;
3381
3382 if (count == (SWAP_MAP_MAX | COUNT_CONTINUED)) {
3383
3384
3385
3386 while (*map == (SWAP_CONT_MAX | COUNT_CONTINUED)) {
3387 kunmap_atomic(map);
3388 page = list_entry(page->lru.next, struct page, lru);
3389 BUG_ON(page == head);
3390 map = kmap_atomic(page) + offset;
3391 }
3392 if (*map == SWAP_CONT_MAX) {
3393 kunmap_atomic(map);
3394 page = list_entry(page->lru.next, struct page, lru);
3395 if (page == head)
3396 return false;
3397 map = kmap_atomic(page) + offset;
3398init_map: *map = 0;
3399 }
3400 *map += 1;
3401 kunmap_atomic(map);
3402 page = list_entry(page->lru.prev, struct page, lru);
3403 while (page != head) {
3404 map = kmap_atomic(page) + offset;
3405 *map = COUNT_CONTINUED;
3406 kunmap_atomic(map);
3407 page = list_entry(page->lru.prev, struct page, lru);
3408 }
3409 return true;
3410
3411 } else {
3412
3413
3414
3415 BUG_ON(count != COUNT_CONTINUED);
3416 while (*map == COUNT_CONTINUED) {
3417 kunmap_atomic(map);
3418 page = list_entry(page->lru.next, struct page, lru);
3419 BUG_ON(page == head);
3420 map = kmap_atomic(page) + offset;
3421 }
3422 BUG_ON(*map == 0);
3423 *map -= 1;
3424 if (*map == 0)
3425 count = 0;
3426 kunmap_atomic(map);
3427 page = list_entry(page->lru.prev, struct page, lru);
3428 while (page != head) {
3429 map = kmap_atomic(page) + offset;
3430 *map = SWAP_CONT_MAX | count;
3431 count = COUNT_CONTINUED;
3432 kunmap_atomic(map);
3433 page = list_entry(page->lru.prev, struct page, lru);
3434 }
3435 return count == COUNT_CONTINUED;
3436 }
3437}
3438
3439
3440
3441
3442
3443static void free_swap_count_continuations(struct swap_info_struct *si)
3444{
3445 pgoff_t offset;
3446
3447 for (offset = 0; offset < si->max; offset += PAGE_SIZE) {
3448 struct page *head;
3449 head = vmalloc_to_page(si->swap_map + offset);
3450 if (page_private(head)) {
3451 struct page *page, *next;
3452
3453 list_for_each_entry_safe(page, next, &head->lru, lru) {
3454 list_del(&page->lru);
3455 __free_page(page);
3456 }
3457 }
3458 }
3459}
3460