1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
25
26#include <linux/atomic.h>
27#include <linux/sched.h>
28#include <linux/cpumask.h>
29#include <linux/list.h>
30#include <linux/mm.h>
31#include <linux/module.h>
32#include <linux/page-flags.h>
33#include <linux/migrate.h>
34#include <linux/node.h>
35#include <linux/compaction.h>
36#include <linux/percpu.h>
37#include <linux/mount.h>
38#include <linux/pseudo_fs.h>
39#include <linux/fs.h>
40#include <linux/preempt.h>
41#include <linux/workqueue.h>
42#include <linux/slab.h>
43#include <linux/spinlock.h>
44#include <linux/wait.h>
45#include <linux/zpool.h>
46#include <linux/magic.h>
47
48
49
50
51
52
53
54
55
56
57
58#define NCHUNKS_ORDER 6
59
60#define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER)
61#define CHUNK_SIZE (1 << CHUNK_SHIFT)
62#define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE)
63#define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT)
64#define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT)
65#define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
66
67#define BUDDY_MASK (0x3)
68#define BUDDY_SHIFT 2
69#define SLOTS_ALIGN (0x40)
70
71
72
73
74struct z3fold_pool;
75struct z3fold_ops {
76 int (*evict)(struct z3fold_pool *pool, unsigned long handle);
77};
78
79enum buddy {
80 HEADLESS = 0,
81 FIRST,
82 MIDDLE,
83 LAST,
84 BUDDIES_MAX = LAST
85};
86
87struct z3fold_buddy_slots {
88
89
90
91
92 unsigned long slot[BUDDY_MASK + 1];
93 unsigned long pool;
94};
95#define HANDLE_FLAG_MASK (0x03)
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114struct z3fold_header {
115 struct list_head buddy;
116 spinlock_t page_lock;
117 struct kref refcount;
118 struct work_struct work;
119 struct z3fold_buddy_slots *slots;
120 struct z3fold_pool *pool;
121 short cpu;
122 unsigned short first_chunks;
123 unsigned short middle_chunks;
124 unsigned short last_chunks;
125 unsigned short start_middle;
126 unsigned short first_num:2;
127 unsigned short mapped_count:2;
128};
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155struct z3fold_pool {
156 const char *name;
157 spinlock_t lock;
158 spinlock_t stale_lock;
159 struct list_head *unbuddied;
160 struct list_head lru;
161 struct list_head stale;
162 atomic64_t pages_nr;
163 struct kmem_cache *c_handle;
164 const struct z3fold_ops *ops;
165 struct zpool *zpool;
166 const struct zpool_ops *zpool_ops;
167 struct workqueue_struct *compact_wq;
168 struct workqueue_struct *release_wq;
169 struct wait_queue_head isolate_wait;
170 struct work_struct work;
171 struct inode *inode;
172 bool destroying;
173 int isolated;
174};
175
176
177
178
179enum z3fold_page_flags {
180 PAGE_HEADLESS = 0,
181 MIDDLE_CHUNK_MAPPED,
182 NEEDS_COMPACTING,
183 PAGE_STALE,
184 PAGE_CLAIMED,
185};
186
187
188
189
190
191
192static int size_to_chunks(size_t size)
193{
194 return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
195}
196
197#define for_each_unbuddied_list(_iter, _begin) \
198 for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
199
200static void compact_page_work(struct work_struct *w);
201
202static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool,
203 gfp_t gfp)
204{
205 struct z3fold_buddy_slots *slots;
206
207 slots = kmem_cache_alloc(pool->c_handle,
208 (gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE)));
209
210 if (slots) {
211 memset(slots->slot, 0, sizeof(slots->slot));
212 slots->pool = (unsigned long)pool;
213 }
214
215 return slots;
216}
217
218static inline struct z3fold_pool *slots_to_pool(struct z3fold_buddy_slots *s)
219{
220 return (struct z3fold_pool *)(s->pool & ~HANDLE_FLAG_MASK);
221}
222
223static inline struct z3fold_buddy_slots *handle_to_slots(unsigned long handle)
224{
225 return (struct z3fold_buddy_slots *)(handle & ~(SLOTS_ALIGN - 1));
226}
227
228static inline void free_handle(unsigned long handle)
229{
230 struct z3fold_buddy_slots *slots;
231 int i;
232 bool is_free;
233
234 if (handle & (1 << PAGE_HEADLESS))
235 return;
236
237 WARN_ON(*(unsigned long *)handle == 0);
238 *(unsigned long *)handle = 0;
239 slots = handle_to_slots(handle);
240 is_free = true;
241 for (i = 0; i <= BUDDY_MASK; i++) {
242 if (slots->slot[i]) {
243 is_free = false;
244 break;
245 }
246 }
247
248 if (is_free) {
249 struct z3fold_pool *pool = slots_to_pool(slots);
250
251 kmem_cache_free(pool->c_handle, slots);
252 }
253}
254
255static int z3fold_init_fs_context(struct fs_context *fc)
256{
257 return init_pseudo(fc, Z3FOLD_MAGIC) ? 0 : -ENOMEM;
258}
259
260static struct file_system_type z3fold_fs = {
261 .name = "z3fold",
262 .init_fs_context = z3fold_init_fs_context,
263 .kill_sb = kill_anon_super,
264};
265
266static struct vfsmount *z3fold_mnt;
267static int z3fold_mount(void)
268{
269 int ret = 0;
270
271 z3fold_mnt = kern_mount(&z3fold_fs);
272 if (IS_ERR(z3fold_mnt))
273 ret = PTR_ERR(z3fold_mnt);
274
275 return ret;
276}
277
278static void z3fold_unmount(void)
279{
280 kern_unmount(z3fold_mnt);
281}
282
283static const struct address_space_operations z3fold_aops;
284static int z3fold_register_migration(struct z3fold_pool *pool)
285{
286 pool->inode = alloc_anon_inode(z3fold_mnt->mnt_sb);
287 if (IS_ERR(pool->inode)) {
288 pool->inode = NULL;
289 return 1;
290 }
291
292 pool->inode->i_mapping->private_data = pool;
293 pool->inode->i_mapping->a_ops = &z3fold_aops;
294 return 0;
295}
296
297static void z3fold_unregister_migration(struct z3fold_pool *pool)
298{
299 if (pool->inode)
300 iput(pool->inode);
301 }
302
303
304static struct z3fold_header *init_z3fold_page(struct page *page,
305 struct z3fold_pool *pool, gfp_t gfp)
306{
307 struct z3fold_header *zhdr = page_address(page);
308 struct z3fold_buddy_slots *slots = alloc_slots(pool, gfp);
309
310 if (!slots)
311 return NULL;
312
313 INIT_LIST_HEAD(&page->lru);
314 clear_bit(PAGE_HEADLESS, &page->private);
315 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
316 clear_bit(NEEDS_COMPACTING, &page->private);
317 clear_bit(PAGE_STALE, &page->private);
318 clear_bit(PAGE_CLAIMED, &page->private);
319
320 spin_lock_init(&zhdr->page_lock);
321 kref_init(&zhdr->refcount);
322 zhdr->first_chunks = 0;
323 zhdr->middle_chunks = 0;
324 zhdr->last_chunks = 0;
325 zhdr->first_num = 0;
326 zhdr->start_middle = 0;
327 zhdr->cpu = -1;
328 zhdr->slots = slots;
329 zhdr->pool = pool;
330 INIT_LIST_HEAD(&zhdr->buddy);
331 INIT_WORK(&zhdr->work, compact_page_work);
332 return zhdr;
333}
334
335
336static void free_z3fold_page(struct page *page, bool headless)
337{
338 if (!headless) {
339 lock_page(page);
340 __ClearPageMovable(page);
341 unlock_page(page);
342 }
343 ClearPagePrivate(page);
344 __free_page(page);
345}
346
347
348static inline void z3fold_page_lock(struct z3fold_header *zhdr)
349{
350 spin_lock(&zhdr->page_lock);
351}
352
353
354static inline int z3fold_page_trylock(struct z3fold_header *zhdr)
355{
356 return spin_trylock(&zhdr->page_lock);
357}
358
359
360static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
361{
362 spin_unlock(&zhdr->page_lock);
363}
364
365
366static inline int __idx(struct z3fold_header *zhdr, enum buddy bud)
367{
368 return (bud + zhdr->first_num) & BUDDY_MASK;
369}
370
371
372
373
374
375static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
376{
377 struct z3fold_buddy_slots *slots;
378 unsigned long h = (unsigned long)zhdr;
379 int idx = 0;
380
381
382
383
384
385 if (bud == HEADLESS)
386 return h | (1 << PAGE_HEADLESS);
387
388
389 idx = __idx(zhdr, bud);
390 h += idx;
391 if (bud == LAST)
392 h |= (zhdr->last_chunks << BUDDY_SHIFT);
393
394 slots = zhdr->slots;
395 slots->slot[idx] = h;
396 return (unsigned long)&slots->slot[idx];
397}
398
399
400static inline struct z3fold_header *handle_to_z3fold_header(unsigned long h)
401{
402 unsigned long addr = h;
403
404 if (!(addr & (1 << PAGE_HEADLESS)))
405 addr = *(unsigned long *)h;
406
407 return (struct z3fold_header *)(addr & PAGE_MASK);
408}
409
410
411static unsigned short handle_to_chunks(unsigned long handle)
412{
413 unsigned long addr = *(unsigned long *)handle;
414
415 return (addr & ~PAGE_MASK) >> BUDDY_SHIFT;
416}
417
418
419
420
421
422
423static enum buddy handle_to_buddy(unsigned long handle)
424{
425 struct z3fold_header *zhdr;
426 unsigned long addr;
427
428 WARN_ON(handle & (1 << PAGE_HEADLESS));
429 addr = *(unsigned long *)handle;
430 zhdr = (struct z3fold_header *)(addr & PAGE_MASK);
431 return (addr - zhdr->first_num) & BUDDY_MASK;
432}
433
434static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr)
435{
436 return zhdr->pool;
437}
438
439static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked)
440{
441 struct page *page = virt_to_page(zhdr);
442 struct z3fold_pool *pool = zhdr_to_pool(zhdr);
443
444 WARN_ON(!list_empty(&zhdr->buddy));
445 set_bit(PAGE_STALE, &page->private);
446 clear_bit(NEEDS_COMPACTING, &page->private);
447 spin_lock(&pool->lock);
448 if (!list_empty(&page->lru))
449 list_del_init(&page->lru);
450 spin_unlock(&pool->lock);
451 if (locked)
452 z3fold_page_unlock(zhdr);
453 spin_lock(&pool->stale_lock);
454 list_add(&zhdr->buddy, &pool->stale);
455 queue_work(pool->release_wq, &pool->work);
456 spin_unlock(&pool->stale_lock);
457}
458
459static void __attribute__((__unused__))
460 release_z3fold_page(struct kref *ref)
461{
462 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
463 refcount);
464 __release_z3fold_page(zhdr, false);
465}
466
467static void release_z3fold_page_locked(struct kref *ref)
468{
469 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
470 refcount);
471 WARN_ON(z3fold_page_trylock(zhdr));
472 __release_z3fold_page(zhdr, true);
473}
474
475static void release_z3fold_page_locked_list(struct kref *ref)
476{
477 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
478 refcount);
479 struct z3fold_pool *pool = zhdr_to_pool(zhdr);
480 spin_lock(&pool->lock);
481 list_del_init(&zhdr->buddy);
482 spin_unlock(&pool->lock);
483
484 WARN_ON(z3fold_page_trylock(zhdr));
485 __release_z3fold_page(zhdr, true);
486}
487
488static void free_pages_work(struct work_struct *w)
489{
490 struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work);
491
492 spin_lock(&pool->stale_lock);
493 while (!list_empty(&pool->stale)) {
494 struct z3fold_header *zhdr = list_first_entry(&pool->stale,
495 struct z3fold_header, buddy);
496 struct page *page = virt_to_page(zhdr);
497
498 list_del(&zhdr->buddy);
499 if (WARN_ON(!test_bit(PAGE_STALE, &page->private)))
500 continue;
501 spin_unlock(&pool->stale_lock);
502 cancel_work_sync(&zhdr->work);
503 free_z3fold_page(page, false);
504 cond_resched();
505 spin_lock(&pool->stale_lock);
506 }
507 spin_unlock(&pool->stale_lock);
508}
509
510
511
512
513
514static int num_free_chunks(struct z3fold_header *zhdr)
515{
516 int nfree;
517
518
519
520
521
522 if (zhdr->middle_chunks != 0) {
523 int nfree_before = zhdr->first_chunks ?
524 0 : zhdr->start_middle - ZHDR_CHUNKS;
525 int nfree_after = zhdr->last_chunks ?
526 0 : TOTAL_CHUNKS -
527 (zhdr->start_middle + zhdr->middle_chunks);
528 nfree = max(nfree_before, nfree_after);
529 } else
530 nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks;
531 return nfree;
532}
533
534
535static inline void add_to_unbuddied(struct z3fold_pool *pool,
536 struct z3fold_header *zhdr)
537{
538 if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 ||
539 zhdr->middle_chunks == 0) {
540 struct list_head *unbuddied = get_cpu_ptr(pool->unbuddied);
541
542 int freechunks = num_free_chunks(zhdr);
543 spin_lock(&pool->lock);
544 list_add(&zhdr->buddy, &unbuddied[freechunks]);
545 spin_unlock(&pool->lock);
546 zhdr->cpu = smp_processor_id();
547 put_cpu_ptr(pool->unbuddied);
548 }
549}
550
551static inline void *mchunk_memmove(struct z3fold_header *zhdr,
552 unsigned short dst_chunk)
553{
554 void *beg = zhdr;
555 return memmove(beg + (dst_chunk << CHUNK_SHIFT),
556 beg + (zhdr->start_middle << CHUNK_SHIFT),
557 zhdr->middle_chunks << CHUNK_SHIFT);
558}
559
560#define BIG_CHUNK_GAP 3
561
562static int z3fold_compact_page(struct z3fold_header *zhdr)
563{
564 struct page *page = virt_to_page(zhdr);
565
566 if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private))
567 return 0;
568
569 if (unlikely(PageIsolated(page)))
570 return 0;
571
572 if (zhdr->middle_chunks == 0)
573 return 0;
574
575 if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
576
577 mchunk_memmove(zhdr, ZHDR_CHUNKS);
578 zhdr->first_chunks = zhdr->middle_chunks;
579 zhdr->middle_chunks = 0;
580 zhdr->start_middle = 0;
581 zhdr->first_num++;
582 return 1;
583 }
584
585
586
587
588
589 if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
590 zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >=
591 BIG_CHUNK_GAP) {
592 mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS);
593 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
594 return 1;
595 } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
596 TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle
597 + zhdr->middle_chunks) >=
598 BIG_CHUNK_GAP) {
599 unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks -
600 zhdr->middle_chunks;
601 mchunk_memmove(zhdr, new_start);
602 zhdr->start_middle = new_start;
603 return 1;
604 }
605
606 return 0;
607}
608
609static void do_compact_page(struct z3fold_header *zhdr, bool locked)
610{
611 struct z3fold_pool *pool = zhdr_to_pool(zhdr);
612 struct page *page;
613
614 page = virt_to_page(zhdr);
615 if (locked)
616 WARN_ON(z3fold_page_trylock(zhdr));
617 else
618 z3fold_page_lock(zhdr);
619 if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) {
620 z3fold_page_unlock(zhdr);
621 return;
622 }
623 spin_lock(&pool->lock);
624 list_del_init(&zhdr->buddy);
625 spin_unlock(&pool->lock);
626
627 if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
628 atomic64_dec(&pool->pages_nr);
629 return;
630 }
631
632 if (unlikely(PageIsolated(page) ||
633 test_bit(PAGE_STALE, &page->private))) {
634 z3fold_page_unlock(zhdr);
635 return;
636 }
637
638 z3fold_compact_page(zhdr);
639 add_to_unbuddied(pool, zhdr);
640 z3fold_page_unlock(zhdr);
641}
642
643static void compact_page_work(struct work_struct *w)
644{
645 struct z3fold_header *zhdr = container_of(w, struct z3fold_header,
646 work);
647
648 do_compact_page(zhdr, false);
649}
650
651
652static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool,
653 size_t size, bool can_sleep)
654{
655 struct z3fold_header *zhdr = NULL;
656 struct page *page;
657 struct list_head *unbuddied;
658 int chunks = size_to_chunks(size), i;
659
660lookup:
661
662 unbuddied = get_cpu_ptr(pool->unbuddied);
663 for_each_unbuddied_list(i, chunks) {
664 struct list_head *l = &unbuddied[i];
665
666 zhdr = list_first_entry_or_null(READ_ONCE(l),
667 struct z3fold_header, buddy);
668
669 if (!zhdr)
670 continue;
671
672
673 spin_lock(&pool->lock);
674 l = &unbuddied[i];
675 if (unlikely(zhdr != list_first_entry(READ_ONCE(l),
676 struct z3fold_header, buddy)) ||
677 !z3fold_page_trylock(zhdr)) {
678 spin_unlock(&pool->lock);
679 zhdr = NULL;
680 put_cpu_ptr(pool->unbuddied);
681 if (can_sleep)
682 cond_resched();
683 goto lookup;
684 }
685 list_del_init(&zhdr->buddy);
686 zhdr->cpu = -1;
687 spin_unlock(&pool->lock);
688
689 page = virt_to_page(zhdr);
690 if (test_bit(NEEDS_COMPACTING, &page->private)) {
691 z3fold_page_unlock(zhdr);
692 zhdr = NULL;
693 put_cpu_ptr(pool->unbuddied);
694 if (can_sleep)
695 cond_resched();
696 goto lookup;
697 }
698
699
700
701
702
703
704
705 kref_get(&zhdr->refcount);
706 break;
707 }
708 put_cpu_ptr(pool->unbuddied);
709
710 if (!zhdr) {
711 int cpu;
712
713
714 for_each_online_cpu(cpu) {
715 struct list_head *l;
716
717 unbuddied = per_cpu_ptr(pool->unbuddied, cpu);
718 spin_lock(&pool->lock);
719 l = &unbuddied[chunks];
720
721 zhdr = list_first_entry_or_null(READ_ONCE(l),
722 struct z3fold_header, buddy);
723
724 if (!zhdr || !z3fold_page_trylock(zhdr)) {
725 spin_unlock(&pool->lock);
726 zhdr = NULL;
727 continue;
728 }
729 list_del_init(&zhdr->buddy);
730 zhdr->cpu = -1;
731 spin_unlock(&pool->lock);
732
733 page = virt_to_page(zhdr);
734 if (test_bit(NEEDS_COMPACTING, &page->private)) {
735 z3fold_page_unlock(zhdr);
736 zhdr = NULL;
737 if (can_sleep)
738 cond_resched();
739 continue;
740 }
741 kref_get(&zhdr->refcount);
742 break;
743 }
744 }
745
746 return zhdr;
747}
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
763 const struct z3fold_ops *ops)
764{
765 struct z3fold_pool *pool = NULL;
766 int i, cpu;
767
768 pool = kzalloc(sizeof(struct z3fold_pool), gfp);
769 if (!pool)
770 goto out;
771 pool->c_handle = kmem_cache_create("z3fold_handle",
772 sizeof(struct z3fold_buddy_slots),
773 SLOTS_ALIGN, 0, NULL);
774 if (!pool->c_handle)
775 goto out_c;
776 spin_lock_init(&pool->lock);
777 spin_lock_init(&pool->stale_lock);
778 init_waitqueue_head(&pool->isolate_wait);
779 pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
780 if (!pool->unbuddied)
781 goto out_pool;
782 for_each_possible_cpu(cpu) {
783 struct list_head *unbuddied =
784 per_cpu_ptr(pool->unbuddied, cpu);
785 for_each_unbuddied_list(i, 0)
786 INIT_LIST_HEAD(&unbuddied[i]);
787 }
788 INIT_LIST_HEAD(&pool->lru);
789 INIT_LIST_HEAD(&pool->stale);
790 atomic64_set(&pool->pages_nr, 0);
791 pool->name = name;
792 pool->compact_wq = create_singlethread_workqueue(pool->name);
793 if (!pool->compact_wq)
794 goto out_unbuddied;
795 pool->release_wq = create_singlethread_workqueue(pool->name);
796 if (!pool->release_wq)
797 goto out_wq;
798 if (z3fold_register_migration(pool))
799 goto out_rwq;
800 INIT_WORK(&pool->work, free_pages_work);
801 pool->ops = ops;
802 return pool;
803
804out_rwq:
805 destroy_workqueue(pool->release_wq);
806out_wq:
807 destroy_workqueue(pool->compact_wq);
808out_unbuddied:
809 free_percpu(pool->unbuddied);
810out_pool:
811 kmem_cache_destroy(pool->c_handle);
812out_c:
813 kfree(pool);
814out:
815 return NULL;
816}
817
818static bool pool_isolated_are_drained(struct z3fold_pool *pool)
819{
820 bool ret;
821
822 spin_lock(&pool->lock);
823 ret = pool->isolated == 0;
824 spin_unlock(&pool->lock);
825 return ret;
826}
827
828
829
830
831
832
833static void z3fold_destroy_pool(struct z3fold_pool *pool)
834{
835 kmem_cache_destroy(pool->c_handle);
836
837
838
839
840
841
842 spin_lock(&pool->lock);
843 pool->destroying = true;
844 spin_unlock(&pool->lock);
845
846
847
848
849
850
851 wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
852
853
854
855
856
857
858
859
860
861
862 destroy_workqueue(pool->compact_wq);
863 destroy_workqueue(pool->release_wq);
864 z3fold_unregister_migration(pool);
865 kfree(pool);
866}
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
888 unsigned long *handle)
889{
890 int chunks = size_to_chunks(size);
891 struct z3fold_header *zhdr = NULL;
892 struct page *page = NULL;
893 enum buddy bud;
894 bool can_sleep = gfpflags_allow_blocking(gfp);
895
896 if (!size)
897 return -EINVAL;
898
899 if (size > PAGE_SIZE)
900 return -ENOSPC;
901
902 if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
903 bud = HEADLESS;
904 else {
905retry:
906 zhdr = __z3fold_alloc(pool, size, can_sleep);
907 if (zhdr) {
908 if (zhdr->first_chunks == 0) {
909 if (zhdr->middle_chunks != 0 &&
910 chunks >= zhdr->start_middle)
911 bud = LAST;
912 else
913 bud = FIRST;
914 } else if (zhdr->last_chunks == 0)
915 bud = LAST;
916 else if (zhdr->middle_chunks == 0)
917 bud = MIDDLE;
918 else {
919 if (kref_put(&zhdr->refcount,
920 release_z3fold_page_locked))
921 atomic64_dec(&pool->pages_nr);
922 else
923 z3fold_page_unlock(zhdr);
924 pr_err("No free chunks in unbuddied\n");
925 WARN_ON(1);
926 goto retry;
927 }
928 page = virt_to_page(zhdr);
929 goto found;
930 }
931 bud = FIRST;
932 }
933
934 page = NULL;
935 if (can_sleep) {
936 spin_lock(&pool->stale_lock);
937 zhdr = list_first_entry_or_null(&pool->stale,
938 struct z3fold_header, buddy);
939
940
941
942
943
944 if (zhdr) {
945 list_del(&zhdr->buddy);
946 spin_unlock(&pool->stale_lock);
947 cancel_work_sync(&zhdr->work);
948 page = virt_to_page(zhdr);
949 } else {
950 spin_unlock(&pool->stale_lock);
951 }
952 }
953 if (!page)
954 page = alloc_page(gfp);
955
956 if (!page)
957 return -ENOMEM;
958
959 zhdr = init_z3fold_page(page, pool, gfp);
960 if (!zhdr) {
961 __free_page(page);
962 return -ENOMEM;
963 }
964 atomic64_inc(&pool->pages_nr);
965
966 if (bud == HEADLESS) {
967 set_bit(PAGE_HEADLESS, &page->private);
968 goto headless;
969 }
970 if (can_sleep) {
971 lock_page(page);
972 __SetPageMovable(page, pool->inode->i_mapping);
973 unlock_page(page);
974 } else {
975 if (trylock_page(page)) {
976 __SetPageMovable(page, pool->inode->i_mapping);
977 unlock_page(page);
978 }
979 }
980 z3fold_page_lock(zhdr);
981
982found:
983 if (bud == FIRST)
984 zhdr->first_chunks = chunks;
985 else if (bud == LAST)
986 zhdr->last_chunks = chunks;
987 else {
988 zhdr->middle_chunks = chunks;
989 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
990 }
991 add_to_unbuddied(pool, zhdr);
992
993headless:
994 spin_lock(&pool->lock);
995
996 if (!list_empty(&page->lru))
997 list_del(&page->lru);
998
999 list_add(&page->lru, &pool->lru);
1000
1001 *handle = encode_handle(zhdr, bud);
1002 spin_unlock(&pool->lock);
1003 if (bud != HEADLESS)
1004 z3fold_page_unlock(zhdr);
1005
1006 return 0;
1007}
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
1020{
1021 struct z3fold_header *zhdr;
1022 struct page *page;
1023 enum buddy bud;
1024
1025 zhdr = handle_to_z3fold_header(handle);
1026 page = virt_to_page(zhdr);
1027
1028 if (test_bit(PAGE_HEADLESS, &page->private)) {
1029
1030
1031
1032
1033
1034 if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) {
1035 spin_lock(&pool->lock);
1036 list_del(&page->lru);
1037 spin_unlock(&pool->lock);
1038 free_z3fold_page(page, true);
1039 atomic64_dec(&pool->pages_nr);
1040 }
1041 return;
1042 }
1043
1044
1045 z3fold_page_lock(zhdr);
1046 bud = handle_to_buddy(handle);
1047
1048 switch (bud) {
1049 case FIRST:
1050 zhdr->first_chunks = 0;
1051 break;
1052 case MIDDLE:
1053 zhdr->middle_chunks = 0;
1054 break;
1055 case LAST:
1056 zhdr->last_chunks = 0;
1057 break;
1058 default:
1059 pr_err("%s: unknown bud %d\n", __func__, bud);
1060 WARN_ON(1);
1061 z3fold_page_unlock(zhdr);
1062 return;
1063 }
1064
1065 free_handle(handle);
1066 if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) {
1067 atomic64_dec(&pool->pages_nr);
1068 return;
1069 }
1070 if (test_bit(PAGE_CLAIMED, &page->private)) {
1071 z3fold_page_unlock(zhdr);
1072 return;
1073 }
1074 if (unlikely(PageIsolated(page)) ||
1075 test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
1076 z3fold_page_unlock(zhdr);
1077 return;
1078 }
1079 if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) {
1080 spin_lock(&pool->lock);
1081 list_del_init(&zhdr->buddy);
1082 spin_unlock(&pool->lock);
1083 zhdr->cpu = -1;
1084 kref_get(&zhdr->refcount);
1085 do_compact_page(zhdr, true);
1086 return;
1087 }
1088 kref_get(&zhdr->refcount);
1089 queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work);
1090 z3fold_page_unlock(zhdr);
1091}
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
1130{
1131 int i, ret = 0;
1132 struct z3fold_header *zhdr = NULL;
1133 struct page *page = NULL;
1134 struct list_head *pos;
1135 unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
1136
1137 spin_lock(&pool->lock);
1138 if (!pool->ops || !pool->ops->evict || retries == 0) {
1139 spin_unlock(&pool->lock);
1140 return -EINVAL;
1141 }
1142 for (i = 0; i < retries; i++) {
1143 if (list_empty(&pool->lru)) {
1144 spin_unlock(&pool->lock);
1145 return -EINVAL;
1146 }
1147 list_for_each_prev(pos, &pool->lru) {
1148 page = list_entry(pos, struct page, lru);
1149
1150
1151
1152
1153 if (test_and_set_bit(PAGE_CLAIMED, &page->private))
1154 continue;
1155
1156 if (unlikely(PageIsolated(page)))
1157 continue;
1158 if (test_bit(PAGE_HEADLESS, &page->private))
1159 break;
1160
1161 zhdr = page_address(page);
1162 if (!z3fold_page_trylock(zhdr)) {
1163 zhdr = NULL;
1164 continue;
1165 }
1166 kref_get(&zhdr->refcount);
1167 list_del_init(&zhdr->buddy);
1168 zhdr->cpu = -1;
1169 break;
1170 }
1171
1172 if (!zhdr)
1173 break;
1174
1175 list_del_init(&page->lru);
1176 spin_unlock(&pool->lock);
1177
1178 if (!test_bit(PAGE_HEADLESS, &page->private)) {
1179
1180
1181
1182
1183
1184 first_handle = 0;
1185 last_handle = 0;
1186 middle_handle = 0;
1187 if (zhdr->first_chunks)
1188 first_handle = encode_handle(zhdr, FIRST);
1189 if (zhdr->middle_chunks)
1190 middle_handle = encode_handle(zhdr, MIDDLE);
1191 if (zhdr->last_chunks)
1192 last_handle = encode_handle(zhdr, LAST);
1193
1194
1195
1196
1197 z3fold_page_unlock(zhdr);
1198 } else {
1199 first_handle = encode_handle(zhdr, HEADLESS);
1200 last_handle = middle_handle = 0;
1201 }
1202
1203
1204 if (middle_handle) {
1205 ret = pool->ops->evict(pool, middle_handle);
1206 if (ret)
1207 goto next;
1208 }
1209 if (first_handle) {
1210 ret = pool->ops->evict(pool, first_handle);
1211 if (ret)
1212 goto next;
1213 }
1214 if (last_handle) {
1215 ret = pool->ops->evict(pool, last_handle);
1216 if (ret)
1217 goto next;
1218 }
1219next:
1220 if (test_bit(PAGE_HEADLESS, &page->private)) {
1221 if (ret == 0) {
1222 free_z3fold_page(page, true);
1223 atomic64_dec(&pool->pages_nr);
1224 return 0;
1225 }
1226 spin_lock(&pool->lock);
1227 list_add(&page->lru, &pool->lru);
1228 spin_unlock(&pool->lock);
1229 } else {
1230 z3fold_page_lock(zhdr);
1231 clear_bit(PAGE_CLAIMED, &page->private);
1232 if (kref_put(&zhdr->refcount,
1233 release_z3fold_page_locked)) {
1234 atomic64_dec(&pool->pages_nr);
1235 return 0;
1236 }
1237
1238
1239
1240
1241
1242 spin_lock(&pool->lock);
1243 list_add(&page->lru, &pool->lru);
1244 spin_unlock(&pool->lock);
1245 z3fold_page_unlock(zhdr);
1246 }
1247
1248
1249 spin_lock(&pool->lock);
1250 }
1251 spin_unlock(&pool->lock);
1252 return -EAGAIN;
1253}
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
1266{
1267 struct z3fold_header *zhdr;
1268 struct page *page;
1269 void *addr;
1270 enum buddy buddy;
1271
1272 zhdr = handle_to_z3fold_header(handle);
1273 addr = zhdr;
1274 page = virt_to_page(zhdr);
1275
1276 if (test_bit(PAGE_HEADLESS, &page->private))
1277 goto out;
1278
1279 z3fold_page_lock(zhdr);
1280 buddy = handle_to_buddy(handle);
1281 switch (buddy) {
1282 case FIRST:
1283 addr += ZHDR_SIZE_ALIGNED;
1284 break;
1285 case MIDDLE:
1286 addr += zhdr->start_middle << CHUNK_SHIFT;
1287 set_bit(MIDDLE_CHUNK_MAPPED, &page->private);
1288 break;
1289 case LAST:
1290 addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT);
1291 break;
1292 default:
1293 pr_err("unknown buddy id %d\n", buddy);
1294 WARN_ON(1);
1295 addr = NULL;
1296 break;
1297 }
1298
1299 if (addr)
1300 zhdr->mapped_count++;
1301 z3fold_page_unlock(zhdr);
1302out:
1303 return addr;
1304}
1305
1306
1307
1308
1309
1310
1311static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
1312{
1313 struct z3fold_header *zhdr;
1314 struct page *page;
1315 enum buddy buddy;
1316
1317 zhdr = handle_to_z3fold_header(handle);
1318 page = virt_to_page(zhdr);
1319
1320 if (test_bit(PAGE_HEADLESS, &page->private))
1321 return;
1322
1323 z3fold_page_lock(zhdr);
1324 buddy = handle_to_buddy(handle);
1325 if (buddy == MIDDLE)
1326 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
1327 zhdr->mapped_count--;
1328 z3fold_page_unlock(zhdr);
1329}
1330
1331
1332
1333
1334
1335
1336
1337static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
1338{
1339 return atomic64_read(&pool->pages_nr);
1340}
1341
1342
1343
1344
1345static void z3fold_dec_isolated(struct z3fold_pool *pool)
1346{
1347 assert_spin_locked(&pool->lock);
1348 VM_BUG_ON(pool->isolated <= 0);
1349 pool->isolated--;
1350
1351
1352
1353
1354
1355 if (pool->isolated == 0 && waitqueue_active(&pool->isolate_wait))
1356 wake_up_all(&pool->isolate_wait);
1357}
1358
1359static void z3fold_inc_isolated(struct z3fold_pool *pool)
1360{
1361 pool->isolated++;
1362}
1363
1364static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
1365{
1366 struct z3fold_header *zhdr;
1367 struct z3fold_pool *pool;
1368
1369 VM_BUG_ON_PAGE(!PageMovable(page), page);
1370 VM_BUG_ON_PAGE(PageIsolated(page), page);
1371
1372 if (test_bit(PAGE_HEADLESS, &page->private))
1373 return false;
1374
1375 zhdr = page_address(page);
1376 z3fold_page_lock(zhdr);
1377 if (test_bit(NEEDS_COMPACTING, &page->private) ||
1378 test_bit(PAGE_STALE, &page->private))
1379 goto out;
1380
1381 pool = zhdr_to_pool(zhdr);
1382
1383 if (zhdr->mapped_count == 0) {
1384 kref_get(&zhdr->refcount);
1385 if (!list_empty(&zhdr->buddy))
1386 list_del_init(&zhdr->buddy);
1387 spin_lock(&pool->lock);
1388 if (!list_empty(&page->lru))
1389 list_del(&page->lru);
1390
1391
1392
1393
1394
1395 if (unlikely(pool->destroying)) {
1396 spin_unlock(&pool->lock);
1397
1398
1399
1400
1401
1402 if (unlikely(kref_put(&zhdr->refcount,
1403 release_z3fold_page_locked))) {
1404
1405
1406
1407
1408 WARN(1, "Z3fold is experiencing kref problems\n");
1409 z3fold_page_unlock(zhdr);
1410 return false;
1411 }
1412 z3fold_page_unlock(zhdr);
1413 return false;
1414 }
1415
1416
1417 z3fold_inc_isolated(pool);
1418 spin_unlock(&pool->lock);
1419 z3fold_page_unlock(zhdr);
1420 return true;
1421 }
1422out:
1423 z3fold_page_unlock(zhdr);
1424 return false;
1425}
1426
1427static int z3fold_page_migrate(struct address_space *mapping, struct page *newpage,
1428 struct page *page, enum migrate_mode mode)
1429{
1430 struct z3fold_header *zhdr, *new_zhdr;
1431 struct z3fold_pool *pool;
1432 struct address_space *new_mapping;
1433
1434 VM_BUG_ON_PAGE(!PageMovable(page), page);
1435 VM_BUG_ON_PAGE(!PageIsolated(page), page);
1436 VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
1437
1438 zhdr = page_address(page);
1439 pool = zhdr_to_pool(zhdr);
1440
1441 if (!z3fold_page_trylock(zhdr)) {
1442 return -EAGAIN;
1443 }
1444 if (zhdr->mapped_count != 0) {
1445 z3fold_page_unlock(zhdr);
1446 return -EBUSY;
1447 }
1448 if (work_pending(&zhdr->work)) {
1449 z3fold_page_unlock(zhdr);
1450 return -EAGAIN;
1451 }
1452 new_zhdr = page_address(newpage);
1453 memcpy(new_zhdr, zhdr, PAGE_SIZE);
1454 newpage->private = page->private;
1455 page->private = 0;
1456 z3fold_page_unlock(zhdr);
1457 spin_lock_init(&new_zhdr->page_lock);
1458 INIT_WORK(&new_zhdr->work, compact_page_work);
1459
1460
1461
1462
1463 INIT_LIST_HEAD(&new_zhdr->buddy);
1464 new_mapping = page_mapping(page);
1465 __ClearPageMovable(page);
1466 ClearPagePrivate(page);
1467
1468 get_page(newpage);
1469 z3fold_page_lock(new_zhdr);
1470 if (new_zhdr->first_chunks)
1471 encode_handle(new_zhdr, FIRST);
1472 if (new_zhdr->last_chunks)
1473 encode_handle(new_zhdr, LAST);
1474 if (new_zhdr->middle_chunks)
1475 encode_handle(new_zhdr, MIDDLE);
1476 set_bit(NEEDS_COMPACTING, &newpage->private);
1477 new_zhdr->cpu = smp_processor_id();
1478 spin_lock(&pool->lock);
1479 list_add(&newpage->lru, &pool->lru);
1480 spin_unlock(&pool->lock);
1481 __SetPageMovable(newpage, new_mapping);
1482 z3fold_page_unlock(new_zhdr);
1483
1484 queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
1485
1486 spin_lock(&pool->lock);
1487 z3fold_dec_isolated(pool);
1488 spin_unlock(&pool->lock);
1489
1490 page_mapcount_reset(page);
1491 put_page(page);
1492 return 0;
1493}
1494
1495static void z3fold_page_putback(struct page *page)
1496{
1497 struct z3fold_header *zhdr;
1498 struct z3fold_pool *pool;
1499
1500 zhdr = page_address(page);
1501 pool = zhdr_to_pool(zhdr);
1502
1503 z3fold_page_lock(zhdr);
1504 if (!list_empty(&zhdr->buddy))
1505 list_del_init(&zhdr->buddy);
1506 INIT_LIST_HEAD(&page->lru);
1507 if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
1508 atomic64_dec(&pool->pages_nr);
1509 spin_lock(&pool->lock);
1510 z3fold_dec_isolated(pool);
1511 spin_unlock(&pool->lock);
1512 return;
1513 }
1514 spin_lock(&pool->lock);
1515 list_add(&page->lru, &pool->lru);
1516 z3fold_dec_isolated(pool);
1517 spin_unlock(&pool->lock);
1518 z3fold_page_unlock(zhdr);
1519}
1520
1521static const struct address_space_operations z3fold_aops = {
1522 .isolate_page = z3fold_page_isolate,
1523 .migratepage = z3fold_page_migrate,
1524 .putback_page = z3fold_page_putback,
1525};
1526
1527
1528
1529
1530
1531static int z3fold_zpool_evict(struct z3fold_pool *pool, unsigned long handle)
1532{
1533 if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict)
1534 return pool->zpool_ops->evict(pool->zpool, handle);
1535 else
1536 return -ENOENT;
1537}
1538
1539static const struct z3fold_ops z3fold_zpool_ops = {
1540 .evict = z3fold_zpool_evict
1541};
1542
1543static void *z3fold_zpool_create(const char *name, gfp_t gfp,
1544 const struct zpool_ops *zpool_ops,
1545 struct zpool *zpool)
1546{
1547 struct z3fold_pool *pool;
1548
1549 pool = z3fold_create_pool(name, gfp,
1550 zpool_ops ? &z3fold_zpool_ops : NULL);
1551 if (pool) {
1552 pool->zpool = zpool;
1553 pool->zpool_ops = zpool_ops;
1554 }
1555 return pool;
1556}
1557
1558static void z3fold_zpool_destroy(void *pool)
1559{
1560 z3fold_destroy_pool(pool);
1561}
1562
1563static int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp,
1564 unsigned long *handle)
1565{
1566 return z3fold_alloc(pool, size, gfp, handle);
1567}
1568static void z3fold_zpool_free(void *pool, unsigned long handle)
1569{
1570 z3fold_free(pool, handle);
1571}
1572
1573static int z3fold_zpool_shrink(void *pool, unsigned int pages,
1574 unsigned int *reclaimed)
1575{
1576 unsigned int total = 0;
1577 int ret = -EINVAL;
1578
1579 while (total < pages) {
1580 ret = z3fold_reclaim_page(pool, 8);
1581 if (ret < 0)
1582 break;
1583 total++;
1584 }
1585
1586 if (reclaimed)
1587 *reclaimed = total;
1588
1589 return ret;
1590}
1591
1592static void *z3fold_zpool_map(void *pool, unsigned long handle,
1593 enum zpool_mapmode mm)
1594{
1595 return z3fold_map(pool, handle);
1596}
1597static void z3fold_zpool_unmap(void *pool, unsigned long handle)
1598{
1599 z3fold_unmap(pool, handle);
1600}
1601
1602static u64 z3fold_zpool_total_size(void *pool)
1603{
1604 return z3fold_get_pool_size(pool) * PAGE_SIZE;
1605}
1606
1607static struct zpool_driver z3fold_zpool_driver = {
1608 .type = "z3fold",
1609 .owner = THIS_MODULE,
1610 .create = z3fold_zpool_create,
1611 .destroy = z3fold_zpool_destroy,
1612 .malloc = z3fold_zpool_malloc,
1613 .free = z3fold_zpool_free,
1614 .shrink = z3fold_zpool_shrink,
1615 .map = z3fold_zpool_map,
1616 .unmap = z3fold_zpool_unmap,
1617 .total_size = z3fold_zpool_total_size,
1618};
1619
1620MODULE_ALIAS("zpool-z3fold");
1621
1622static int __init init_z3fold(void)
1623{
1624 int ret;
1625
1626
1627 BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE);
1628 ret = z3fold_mount();
1629 if (ret)
1630 return ret;
1631
1632 zpool_register_driver(&z3fold_zpool_driver);
1633
1634 return 0;
1635}
1636
1637static void __exit exit_z3fold(void)
1638{
1639 z3fold_unmount();
1640 zpool_unregister_driver(&z3fold_zpool_driver);
1641}
1642
1643module_init(init_z3fold);
1644module_exit(exit_z3fold);
1645
1646MODULE_LICENSE("GPL");
1647MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>");
1648MODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages");
1649