1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
25
26#include <linux/atomic.h>
27#include <linux/sched.h>
28#include <linux/cpumask.h>
29#include <linux/dcache.h>
30#include <linux/list.h>
31#include <linux/mm.h>
32#include <linux/module.h>
33#include <linux/page-flags.h>
34#include <linux/migrate.h>
35#include <linux/node.h>
36#include <linux/compaction.h>
37#include <linux/percpu.h>
38#include <linux/mount.h>
39#include <linux/fs.h>
40#include <linux/preempt.h>
41#include <linux/workqueue.h>
42#include <linux/slab.h>
43#include <linux/spinlock.h>
44#include <linux/zpool.h>
45
46
47
48
49
50
51
52
53
54
55
56#define NCHUNKS_ORDER 6
57
58#define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER)
59#define CHUNK_SIZE (1 << CHUNK_SHIFT)
60#define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE)
61#define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT)
62#define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT)
63#define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
64
65#define BUDDY_MASK (0x3)
66#define BUDDY_SHIFT 2
67#define SLOTS_ALIGN (0x40)
68
69
70
71
72struct z3fold_pool;
73struct z3fold_ops {
74 int (*evict)(struct z3fold_pool *pool, unsigned long handle);
75};
76
77enum buddy {
78 HEADLESS = 0,
79 FIRST,
80 MIDDLE,
81 LAST,
82 BUDDIES_MAX = LAST
83};
84
85struct z3fold_buddy_slots {
86
87
88
89
90 unsigned long slot[BUDDY_MASK + 1];
91 unsigned long pool;
92};
93#define HANDLE_FLAG_MASK (0x03)
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111struct z3fold_header {
112 struct list_head buddy;
113 spinlock_t page_lock;
114 struct kref refcount;
115 struct work_struct work;
116 struct z3fold_buddy_slots *slots;
117 short cpu;
118 unsigned short first_chunks;
119 unsigned short middle_chunks;
120 unsigned short last_chunks;
121 unsigned short start_middle;
122 unsigned short first_num:2;
123 unsigned short mapped_count:2;
124};
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149struct z3fold_pool {
150 const char *name;
151 spinlock_t lock;
152 spinlock_t stale_lock;
153 struct list_head *unbuddied;
154 struct list_head lru;
155 struct list_head stale;
156 atomic64_t pages_nr;
157 struct kmem_cache *c_handle;
158 const struct z3fold_ops *ops;
159 struct zpool *zpool;
160 const struct zpool_ops *zpool_ops;
161 struct workqueue_struct *compact_wq;
162 struct workqueue_struct *release_wq;
163 struct work_struct work;
164 struct inode *inode;
165};
166
167
168
169
170enum z3fold_page_flags {
171 PAGE_HEADLESS = 0,
172 MIDDLE_CHUNK_MAPPED,
173 NEEDS_COMPACTING,
174 PAGE_STALE,
175 PAGE_CLAIMED,
176};
177
178
179
180
181
182
183static int size_to_chunks(size_t size)
184{
185 return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
186}
187
188#define for_each_unbuddied_list(_iter, _begin) \
189 for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
190
191static void compact_page_work(struct work_struct *w);
192
193static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool,
194 gfp_t gfp)
195{
196 struct z3fold_buddy_slots *slots = kmem_cache_alloc(pool->c_handle,
197 gfp);
198
199 if (slots) {
200 memset(slots->slot, 0, sizeof(slots->slot));
201 slots->pool = (unsigned long)pool;
202 }
203
204 return slots;
205}
206
207static inline struct z3fold_pool *slots_to_pool(struct z3fold_buddy_slots *s)
208{
209 return (struct z3fold_pool *)(s->pool & ~HANDLE_FLAG_MASK);
210}
211
212static inline struct z3fold_buddy_slots *handle_to_slots(unsigned long handle)
213{
214 return (struct z3fold_buddy_slots *)(handle & ~(SLOTS_ALIGN - 1));
215}
216
217static inline void free_handle(unsigned long handle)
218{
219 struct z3fold_buddy_slots *slots;
220 int i;
221 bool is_free;
222
223 if (handle & (1 << PAGE_HEADLESS))
224 return;
225
226 WARN_ON(*(unsigned long *)handle == 0);
227 *(unsigned long *)handle = 0;
228 slots = handle_to_slots(handle);
229 is_free = true;
230 for (i = 0; i <= BUDDY_MASK; i++) {
231 if (slots->slot[i]) {
232 is_free = false;
233 break;
234 }
235 }
236
237 if (is_free) {
238 struct z3fold_pool *pool = slots_to_pool(slots);
239
240 kmem_cache_free(pool->c_handle, slots);
241 }
242}
243
244static struct dentry *z3fold_do_mount(struct file_system_type *fs_type,
245 int flags, const char *dev_name, void *data)
246{
247 static const struct dentry_operations ops = {
248 .d_dname = simple_dname,
249 };
250
251 return mount_pseudo(fs_type, "z3fold:", NULL, &ops, 0x33);
252}
253
254static struct file_system_type z3fold_fs = {
255 .name = "z3fold",
256 .mount = z3fold_do_mount,
257 .kill_sb = kill_anon_super,
258};
259
260static struct vfsmount *z3fold_mnt;
261static int z3fold_mount(void)
262{
263 int ret = 0;
264
265 z3fold_mnt = kern_mount(&z3fold_fs);
266 if (IS_ERR(z3fold_mnt))
267 ret = PTR_ERR(z3fold_mnt);
268
269 return ret;
270}
271
272static void z3fold_unmount(void)
273{
274 kern_unmount(z3fold_mnt);
275}
276
277static const struct address_space_operations z3fold_aops;
278static int z3fold_register_migration(struct z3fold_pool *pool)
279{
280 pool->inode = alloc_anon_inode(z3fold_mnt->mnt_sb);
281 if (IS_ERR(pool->inode)) {
282 pool->inode = NULL;
283 return 1;
284 }
285
286 pool->inode->i_mapping->private_data = pool;
287 pool->inode->i_mapping->a_ops = &z3fold_aops;
288 return 0;
289}
290
291static void z3fold_unregister_migration(struct z3fold_pool *pool)
292{
293 if (pool->inode)
294 iput(pool->inode);
295 }
296
297
298static struct z3fold_header *init_z3fold_page(struct page *page,
299 struct z3fold_pool *pool, gfp_t gfp)
300{
301 struct z3fold_header *zhdr = page_address(page);
302 struct z3fold_buddy_slots *slots = alloc_slots(pool, gfp);
303
304 if (!slots)
305 return NULL;
306
307 INIT_LIST_HEAD(&page->lru);
308 clear_bit(PAGE_HEADLESS, &page->private);
309 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
310 clear_bit(NEEDS_COMPACTING, &page->private);
311 clear_bit(PAGE_STALE, &page->private);
312 clear_bit(PAGE_CLAIMED, &page->private);
313
314 spin_lock_init(&zhdr->page_lock);
315 kref_init(&zhdr->refcount);
316 zhdr->first_chunks = 0;
317 zhdr->middle_chunks = 0;
318 zhdr->last_chunks = 0;
319 zhdr->first_num = 0;
320 zhdr->start_middle = 0;
321 zhdr->cpu = -1;
322 zhdr->slots = slots;
323 INIT_LIST_HEAD(&zhdr->buddy);
324 INIT_WORK(&zhdr->work, compact_page_work);
325 return zhdr;
326}
327
328
329static void free_z3fold_page(struct page *page, bool headless)
330{
331 if (!headless) {
332 lock_page(page);
333 __ClearPageMovable(page);
334 unlock_page(page);
335 }
336 ClearPagePrivate(page);
337 __free_page(page);
338}
339
340
341static inline void z3fold_page_lock(struct z3fold_header *zhdr)
342{
343 spin_lock(&zhdr->page_lock);
344}
345
346
347static inline int z3fold_page_trylock(struct z3fold_header *zhdr)
348{
349 return spin_trylock(&zhdr->page_lock);
350}
351
352
353static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
354{
355 spin_unlock(&zhdr->page_lock);
356}
357
358
359static inline int __idx(struct z3fold_header *zhdr, enum buddy bud)
360{
361 return (bud + zhdr->first_num) & BUDDY_MASK;
362}
363
364
365
366
367
368static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
369{
370 struct z3fold_buddy_slots *slots;
371 unsigned long h = (unsigned long)zhdr;
372 int idx = 0;
373
374
375
376
377
378 if (bud == HEADLESS)
379 return h | (1 << PAGE_HEADLESS);
380
381
382 idx = __idx(zhdr, bud);
383 h += idx;
384 if (bud == LAST)
385 h |= (zhdr->last_chunks << BUDDY_SHIFT);
386
387 slots = zhdr->slots;
388 slots->slot[idx] = h;
389 return (unsigned long)&slots->slot[idx];
390}
391
392
393static inline struct z3fold_header *handle_to_z3fold_header(unsigned long h)
394{
395 unsigned long addr = h;
396
397 if (!(addr & (1 << PAGE_HEADLESS)))
398 addr = *(unsigned long *)h;
399
400 return (struct z3fold_header *)(addr & PAGE_MASK);
401}
402
403
404static unsigned short handle_to_chunks(unsigned long handle)
405{
406 unsigned long addr = *(unsigned long *)handle;
407
408 return (addr & ~PAGE_MASK) >> BUDDY_SHIFT;
409}
410
411
412
413
414
415
416static enum buddy handle_to_buddy(unsigned long handle)
417{
418 struct z3fold_header *zhdr;
419 unsigned long addr;
420
421 WARN_ON(handle & (1 << PAGE_HEADLESS));
422 addr = *(unsigned long *)handle;
423 zhdr = (struct z3fold_header *)(addr & PAGE_MASK);
424 return (addr - zhdr->first_num) & BUDDY_MASK;
425}
426
427static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr)
428{
429 return slots_to_pool(zhdr->slots);
430}
431
432static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked)
433{
434 struct page *page = virt_to_page(zhdr);
435 struct z3fold_pool *pool = zhdr_to_pool(zhdr);
436
437 WARN_ON(!list_empty(&zhdr->buddy));
438 set_bit(PAGE_STALE, &page->private);
439 clear_bit(NEEDS_COMPACTING, &page->private);
440 spin_lock(&pool->lock);
441 if (!list_empty(&page->lru))
442 list_del_init(&page->lru);
443 spin_unlock(&pool->lock);
444 if (locked)
445 z3fold_page_unlock(zhdr);
446 spin_lock(&pool->stale_lock);
447 list_add(&zhdr->buddy, &pool->stale);
448 queue_work(pool->release_wq, &pool->work);
449 spin_unlock(&pool->stale_lock);
450}
451
452static void __attribute__((__unused__))
453 release_z3fold_page(struct kref *ref)
454{
455 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
456 refcount);
457 __release_z3fold_page(zhdr, false);
458}
459
460static void release_z3fold_page_locked(struct kref *ref)
461{
462 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
463 refcount);
464 WARN_ON(z3fold_page_trylock(zhdr));
465 __release_z3fold_page(zhdr, true);
466}
467
468static void release_z3fold_page_locked_list(struct kref *ref)
469{
470 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
471 refcount);
472 struct z3fold_pool *pool = zhdr_to_pool(zhdr);
473 spin_lock(&pool->lock);
474 list_del_init(&zhdr->buddy);
475 spin_unlock(&pool->lock);
476
477 WARN_ON(z3fold_page_trylock(zhdr));
478 __release_z3fold_page(zhdr, true);
479}
480
481static void free_pages_work(struct work_struct *w)
482{
483 struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work);
484
485 spin_lock(&pool->stale_lock);
486 while (!list_empty(&pool->stale)) {
487 struct z3fold_header *zhdr = list_first_entry(&pool->stale,
488 struct z3fold_header, buddy);
489 struct page *page = virt_to_page(zhdr);
490
491 list_del(&zhdr->buddy);
492 if (WARN_ON(!test_bit(PAGE_STALE, &page->private)))
493 continue;
494 spin_unlock(&pool->stale_lock);
495 cancel_work_sync(&zhdr->work);
496 free_z3fold_page(page, false);
497 cond_resched();
498 spin_lock(&pool->stale_lock);
499 }
500 spin_unlock(&pool->stale_lock);
501}
502
503
504
505
506
507static int num_free_chunks(struct z3fold_header *zhdr)
508{
509 int nfree;
510
511
512
513
514
515 if (zhdr->middle_chunks != 0) {
516 int nfree_before = zhdr->first_chunks ?
517 0 : zhdr->start_middle - ZHDR_CHUNKS;
518 int nfree_after = zhdr->last_chunks ?
519 0 : TOTAL_CHUNKS -
520 (zhdr->start_middle + zhdr->middle_chunks);
521 nfree = max(nfree_before, nfree_after);
522 } else
523 nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks;
524 return nfree;
525}
526
527
528static inline void add_to_unbuddied(struct z3fold_pool *pool,
529 struct z3fold_header *zhdr)
530{
531 if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 ||
532 zhdr->middle_chunks == 0) {
533 struct list_head *unbuddied = get_cpu_ptr(pool->unbuddied);
534
535 int freechunks = num_free_chunks(zhdr);
536 spin_lock(&pool->lock);
537 list_add(&zhdr->buddy, &unbuddied[freechunks]);
538 spin_unlock(&pool->lock);
539 zhdr->cpu = smp_processor_id();
540 put_cpu_ptr(pool->unbuddied);
541 }
542}
543
544static inline void *mchunk_memmove(struct z3fold_header *zhdr,
545 unsigned short dst_chunk)
546{
547 void *beg = zhdr;
548 return memmove(beg + (dst_chunk << CHUNK_SHIFT),
549 beg + (zhdr->start_middle << CHUNK_SHIFT),
550 zhdr->middle_chunks << CHUNK_SHIFT);
551}
552
553#define BIG_CHUNK_GAP 3
554
555static int z3fold_compact_page(struct z3fold_header *zhdr)
556{
557 struct page *page = virt_to_page(zhdr);
558
559 if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private))
560 return 0;
561
562 if (unlikely(PageIsolated(page)))
563 return 0;
564
565 if (zhdr->middle_chunks == 0)
566 return 0;
567
568 if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
569
570 mchunk_memmove(zhdr, ZHDR_CHUNKS);
571 zhdr->first_chunks = zhdr->middle_chunks;
572 zhdr->middle_chunks = 0;
573 zhdr->start_middle = 0;
574 zhdr->first_num++;
575 return 1;
576 }
577
578
579
580
581
582 if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
583 zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >=
584 BIG_CHUNK_GAP) {
585 mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS);
586 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
587 return 1;
588 } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
589 TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle
590 + zhdr->middle_chunks) >=
591 BIG_CHUNK_GAP) {
592 unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks -
593 zhdr->middle_chunks;
594 mchunk_memmove(zhdr, new_start);
595 zhdr->start_middle = new_start;
596 return 1;
597 }
598
599 return 0;
600}
601
602static void do_compact_page(struct z3fold_header *zhdr, bool locked)
603{
604 struct z3fold_pool *pool = zhdr_to_pool(zhdr);
605 struct page *page;
606
607 page = virt_to_page(zhdr);
608 if (locked)
609 WARN_ON(z3fold_page_trylock(zhdr));
610 else
611 z3fold_page_lock(zhdr);
612 if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) {
613 z3fold_page_unlock(zhdr);
614 return;
615 }
616 spin_lock(&pool->lock);
617 list_del_init(&zhdr->buddy);
618 spin_unlock(&pool->lock);
619
620 if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
621 atomic64_dec(&pool->pages_nr);
622 return;
623 }
624
625 if (unlikely(PageIsolated(page) ||
626 test_bit(PAGE_STALE, &page->private))) {
627 z3fold_page_unlock(zhdr);
628 return;
629 }
630
631 z3fold_compact_page(zhdr);
632 add_to_unbuddied(pool, zhdr);
633 z3fold_page_unlock(zhdr);
634}
635
636static void compact_page_work(struct work_struct *w)
637{
638 struct z3fold_header *zhdr = container_of(w, struct z3fold_header,
639 work);
640
641 do_compact_page(zhdr, false);
642}
643
644
645static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool,
646 size_t size, bool can_sleep)
647{
648 struct z3fold_header *zhdr = NULL;
649 struct page *page;
650 struct list_head *unbuddied;
651 int chunks = size_to_chunks(size), i;
652
653lookup:
654
655 unbuddied = get_cpu_ptr(pool->unbuddied);
656 for_each_unbuddied_list(i, chunks) {
657 struct list_head *l = &unbuddied[i];
658
659 zhdr = list_first_entry_or_null(READ_ONCE(l),
660 struct z3fold_header, buddy);
661
662 if (!zhdr)
663 continue;
664
665
666 spin_lock(&pool->lock);
667 l = &unbuddied[i];
668 if (unlikely(zhdr != list_first_entry(READ_ONCE(l),
669 struct z3fold_header, buddy)) ||
670 !z3fold_page_trylock(zhdr)) {
671 spin_unlock(&pool->lock);
672 zhdr = NULL;
673 put_cpu_ptr(pool->unbuddied);
674 if (can_sleep)
675 cond_resched();
676 goto lookup;
677 }
678 list_del_init(&zhdr->buddy);
679 zhdr->cpu = -1;
680 spin_unlock(&pool->lock);
681
682 page = virt_to_page(zhdr);
683 if (test_bit(NEEDS_COMPACTING, &page->private)) {
684 z3fold_page_unlock(zhdr);
685 zhdr = NULL;
686 put_cpu_ptr(pool->unbuddied);
687 if (can_sleep)
688 cond_resched();
689 goto lookup;
690 }
691
692
693
694
695
696
697
698 kref_get(&zhdr->refcount);
699 break;
700 }
701 put_cpu_ptr(pool->unbuddied);
702
703 if (!zhdr) {
704 int cpu;
705
706
707 for_each_online_cpu(cpu) {
708 struct list_head *l;
709
710 unbuddied = per_cpu_ptr(pool->unbuddied, cpu);
711 spin_lock(&pool->lock);
712 l = &unbuddied[chunks];
713
714 zhdr = list_first_entry_or_null(READ_ONCE(l),
715 struct z3fold_header, buddy);
716
717 if (!zhdr || !z3fold_page_trylock(zhdr)) {
718 spin_unlock(&pool->lock);
719 zhdr = NULL;
720 continue;
721 }
722 list_del_init(&zhdr->buddy);
723 zhdr->cpu = -1;
724 spin_unlock(&pool->lock);
725
726 page = virt_to_page(zhdr);
727 if (test_bit(NEEDS_COMPACTING, &page->private)) {
728 z3fold_page_unlock(zhdr);
729 zhdr = NULL;
730 if (can_sleep)
731 cond_resched();
732 continue;
733 }
734 kref_get(&zhdr->refcount);
735 break;
736 }
737 }
738
739 return zhdr;
740}
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
756 const struct z3fold_ops *ops)
757{
758 struct z3fold_pool *pool = NULL;
759 int i, cpu;
760
761 pool = kzalloc(sizeof(struct z3fold_pool), gfp);
762 if (!pool)
763 goto out;
764 pool->c_handle = kmem_cache_create("z3fold_handle",
765 sizeof(struct z3fold_buddy_slots),
766 SLOTS_ALIGN, 0, NULL);
767 if (!pool->c_handle)
768 goto out_c;
769 spin_lock_init(&pool->lock);
770 spin_lock_init(&pool->stale_lock);
771 pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
772 if (!pool->unbuddied)
773 goto out_pool;
774 for_each_possible_cpu(cpu) {
775 struct list_head *unbuddied =
776 per_cpu_ptr(pool->unbuddied, cpu);
777 for_each_unbuddied_list(i, 0)
778 INIT_LIST_HEAD(&unbuddied[i]);
779 }
780 INIT_LIST_HEAD(&pool->lru);
781 INIT_LIST_HEAD(&pool->stale);
782 atomic64_set(&pool->pages_nr, 0);
783 pool->name = name;
784 pool->compact_wq = create_singlethread_workqueue(pool->name);
785 if (!pool->compact_wq)
786 goto out_unbuddied;
787 pool->release_wq = create_singlethread_workqueue(pool->name);
788 if (!pool->release_wq)
789 goto out_wq;
790 if (z3fold_register_migration(pool))
791 goto out_rwq;
792 INIT_WORK(&pool->work, free_pages_work);
793 pool->ops = ops;
794 return pool;
795
796out_rwq:
797 destroy_workqueue(pool->release_wq);
798out_wq:
799 destroy_workqueue(pool->compact_wq);
800out_unbuddied:
801 free_percpu(pool->unbuddied);
802out_pool:
803 kmem_cache_destroy(pool->c_handle);
804out_c:
805 kfree(pool);
806out:
807 return NULL;
808}
809
810
811
812
813
814
815
816static void z3fold_destroy_pool(struct z3fold_pool *pool)
817{
818 kmem_cache_destroy(pool->c_handle);
819 z3fold_unregister_migration(pool);
820 destroy_workqueue(pool->release_wq);
821 destroy_workqueue(pool->compact_wq);
822 kfree(pool);
823}
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
845 unsigned long *handle)
846{
847 int chunks = size_to_chunks(size);
848 struct z3fold_header *zhdr = NULL;
849 struct page *page = NULL;
850 enum buddy bud;
851 bool can_sleep = gfpflags_allow_blocking(gfp);
852
853 if (!size || (gfp & __GFP_HIGHMEM))
854 return -EINVAL;
855
856 if (size > PAGE_SIZE)
857 return -ENOSPC;
858
859 if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
860 bud = HEADLESS;
861 else {
862retry:
863 zhdr = __z3fold_alloc(pool, size, can_sleep);
864 if (zhdr) {
865 if (zhdr->first_chunks == 0) {
866 if (zhdr->middle_chunks != 0 &&
867 chunks >= zhdr->start_middle)
868 bud = LAST;
869 else
870 bud = FIRST;
871 } else if (zhdr->last_chunks == 0)
872 bud = LAST;
873 else if (zhdr->middle_chunks == 0)
874 bud = MIDDLE;
875 else {
876 if (kref_put(&zhdr->refcount,
877 release_z3fold_page_locked))
878 atomic64_dec(&pool->pages_nr);
879 else
880 z3fold_page_unlock(zhdr);
881 pr_err("No free chunks in unbuddied\n");
882 WARN_ON(1);
883 goto retry;
884 }
885 page = virt_to_page(zhdr);
886 goto found;
887 }
888 bud = FIRST;
889 }
890
891 page = NULL;
892 if (can_sleep) {
893 spin_lock(&pool->stale_lock);
894 zhdr = list_first_entry_or_null(&pool->stale,
895 struct z3fold_header, buddy);
896
897
898
899
900
901 if (zhdr) {
902 list_del(&zhdr->buddy);
903 spin_unlock(&pool->stale_lock);
904 cancel_work_sync(&zhdr->work);
905 page = virt_to_page(zhdr);
906 } else {
907 spin_unlock(&pool->stale_lock);
908 }
909 }
910 if (!page)
911 page = alloc_page(gfp);
912
913 if (!page)
914 return -ENOMEM;
915
916 zhdr = init_z3fold_page(page, pool, gfp);
917 if (!zhdr) {
918 __free_page(page);
919 return -ENOMEM;
920 }
921 atomic64_inc(&pool->pages_nr);
922
923 if (bud == HEADLESS) {
924 set_bit(PAGE_HEADLESS, &page->private);
925 goto headless;
926 }
927 __SetPageMovable(page, pool->inode->i_mapping);
928 z3fold_page_lock(zhdr);
929
930found:
931 if (bud == FIRST)
932 zhdr->first_chunks = chunks;
933 else if (bud == LAST)
934 zhdr->last_chunks = chunks;
935 else {
936 zhdr->middle_chunks = chunks;
937 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
938 }
939 add_to_unbuddied(pool, zhdr);
940
941headless:
942 spin_lock(&pool->lock);
943
944 if (!list_empty(&page->lru))
945 list_del(&page->lru);
946
947 list_add(&page->lru, &pool->lru);
948
949 *handle = encode_handle(zhdr, bud);
950 spin_unlock(&pool->lock);
951 if (bud != HEADLESS)
952 z3fold_page_unlock(zhdr);
953
954 return 0;
955}
956
957
958
959
960
961
962
963
964
965
966
967static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
968{
969 struct z3fold_header *zhdr;
970 struct page *page;
971 enum buddy bud;
972
973 zhdr = handle_to_z3fold_header(handle);
974 page = virt_to_page(zhdr);
975
976 if (test_bit(PAGE_HEADLESS, &page->private)) {
977
978
979
980
981
982 if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) {
983 spin_lock(&pool->lock);
984 list_del(&page->lru);
985 spin_unlock(&pool->lock);
986 free_z3fold_page(page, true);
987 atomic64_dec(&pool->pages_nr);
988 }
989 return;
990 }
991
992
993 z3fold_page_lock(zhdr);
994 bud = handle_to_buddy(handle);
995
996 switch (bud) {
997 case FIRST:
998 zhdr->first_chunks = 0;
999 break;
1000 case MIDDLE:
1001 zhdr->middle_chunks = 0;
1002 break;
1003 case LAST:
1004 zhdr->last_chunks = 0;
1005 break;
1006 default:
1007 pr_err("%s: unknown bud %d\n", __func__, bud);
1008 WARN_ON(1);
1009 z3fold_page_unlock(zhdr);
1010 return;
1011 }
1012
1013 free_handle(handle);
1014 if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) {
1015 atomic64_dec(&pool->pages_nr);
1016 return;
1017 }
1018 if (test_bit(PAGE_CLAIMED, &page->private)) {
1019 z3fold_page_unlock(zhdr);
1020 return;
1021 }
1022 if (unlikely(PageIsolated(page)) ||
1023 test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
1024 z3fold_page_unlock(zhdr);
1025 return;
1026 }
1027 if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) {
1028 spin_lock(&pool->lock);
1029 list_del_init(&zhdr->buddy);
1030 spin_unlock(&pool->lock);
1031 zhdr->cpu = -1;
1032 kref_get(&zhdr->refcount);
1033 do_compact_page(zhdr, true);
1034 return;
1035 }
1036 kref_get(&zhdr->refcount);
1037 queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work);
1038 z3fold_page_unlock(zhdr);
1039}
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
1078{
1079 int i, ret = 0;
1080 struct z3fold_header *zhdr = NULL;
1081 struct page *page = NULL;
1082 struct list_head *pos;
1083 unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
1084
1085 spin_lock(&pool->lock);
1086 if (!pool->ops || !pool->ops->evict || retries == 0) {
1087 spin_unlock(&pool->lock);
1088 return -EINVAL;
1089 }
1090 for (i = 0; i < retries; i++) {
1091 if (list_empty(&pool->lru)) {
1092 spin_unlock(&pool->lock);
1093 return -EINVAL;
1094 }
1095 list_for_each_prev(pos, &pool->lru) {
1096 page = list_entry(pos, struct page, lru);
1097
1098
1099
1100
1101 if (test_and_set_bit(PAGE_CLAIMED, &page->private))
1102 continue;
1103
1104 if (unlikely(PageIsolated(page)))
1105 continue;
1106 if (test_bit(PAGE_HEADLESS, &page->private))
1107 break;
1108
1109 zhdr = page_address(page);
1110 if (!z3fold_page_trylock(zhdr)) {
1111 zhdr = NULL;
1112 continue;
1113 }
1114 kref_get(&zhdr->refcount);
1115 list_del_init(&zhdr->buddy);
1116 zhdr->cpu = -1;
1117 break;
1118 }
1119
1120 if (!zhdr)
1121 break;
1122
1123 list_del_init(&page->lru);
1124 spin_unlock(&pool->lock);
1125
1126 if (!test_bit(PAGE_HEADLESS, &page->private)) {
1127
1128
1129
1130
1131
1132 first_handle = 0;
1133 last_handle = 0;
1134 middle_handle = 0;
1135 if (zhdr->first_chunks)
1136 first_handle = encode_handle(zhdr, FIRST);
1137 if (zhdr->middle_chunks)
1138 middle_handle = encode_handle(zhdr, MIDDLE);
1139 if (zhdr->last_chunks)
1140 last_handle = encode_handle(zhdr, LAST);
1141
1142
1143
1144
1145 z3fold_page_unlock(zhdr);
1146 } else {
1147 first_handle = encode_handle(zhdr, HEADLESS);
1148 last_handle = middle_handle = 0;
1149 }
1150
1151
1152 if (middle_handle) {
1153 ret = pool->ops->evict(pool, middle_handle);
1154 if (ret)
1155 goto next;
1156 }
1157 if (first_handle) {
1158 ret = pool->ops->evict(pool, first_handle);
1159 if (ret)
1160 goto next;
1161 }
1162 if (last_handle) {
1163 ret = pool->ops->evict(pool, last_handle);
1164 if (ret)
1165 goto next;
1166 }
1167next:
1168 if (test_bit(PAGE_HEADLESS, &page->private)) {
1169 if (ret == 0) {
1170 free_z3fold_page(page, true);
1171 atomic64_dec(&pool->pages_nr);
1172 return 0;
1173 }
1174 spin_lock(&pool->lock);
1175 list_add(&page->lru, &pool->lru);
1176 spin_unlock(&pool->lock);
1177 } else {
1178 z3fold_page_lock(zhdr);
1179 clear_bit(PAGE_CLAIMED, &page->private);
1180 if (kref_put(&zhdr->refcount,
1181 release_z3fold_page_locked)) {
1182 atomic64_dec(&pool->pages_nr);
1183 return 0;
1184 }
1185
1186
1187
1188
1189
1190 spin_lock(&pool->lock);
1191 list_add(&page->lru, &pool->lru);
1192 spin_unlock(&pool->lock);
1193 z3fold_page_unlock(zhdr);
1194 }
1195
1196
1197 spin_lock(&pool->lock);
1198 }
1199 spin_unlock(&pool->lock);
1200 return -EAGAIN;
1201}
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
1214{
1215 struct z3fold_header *zhdr;
1216 struct page *page;
1217 void *addr;
1218 enum buddy buddy;
1219
1220 zhdr = handle_to_z3fold_header(handle);
1221 addr = zhdr;
1222 page = virt_to_page(zhdr);
1223
1224 if (test_bit(PAGE_HEADLESS, &page->private))
1225 goto out;
1226
1227 z3fold_page_lock(zhdr);
1228 buddy = handle_to_buddy(handle);
1229 switch (buddy) {
1230 case FIRST:
1231 addr += ZHDR_SIZE_ALIGNED;
1232 break;
1233 case MIDDLE:
1234 addr += zhdr->start_middle << CHUNK_SHIFT;
1235 set_bit(MIDDLE_CHUNK_MAPPED, &page->private);
1236 break;
1237 case LAST:
1238 addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT);
1239 break;
1240 default:
1241 pr_err("unknown buddy id %d\n", buddy);
1242 WARN_ON(1);
1243 addr = NULL;
1244 break;
1245 }
1246
1247 if (addr)
1248 zhdr->mapped_count++;
1249 z3fold_page_unlock(zhdr);
1250out:
1251 return addr;
1252}
1253
1254
1255
1256
1257
1258
1259static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
1260{
1261 struct z3fold_header *zhdr;
1262 struct page *page;
1263 enum buddy buddy;
1264
1265 zhdr = handle_to_z3fold_header(handle);
1266 page = virt_to_page(zhdr);
1267
1268 if (test_bit(PAGE_HEADLESS, &page->private))
1269 return;
1270
1271 z3fold_page_lock(zhdr);
1272 buddy = handle_to_buddy(handle);
1273 if (buddy == MIDDLE)
1274 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
1275 zhdr->mapped_count--;
1276 z3fold_page_unlock(zhdr);
1277}
1278
1279
1280
1281
1282
1283
1284
1285static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
1286{
1287 return atomic64_read(&pool->pages_nr);
1288}
1289
1290static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
1291{
1292 struct z3fold_header *zhdr;
1293 struct z3fold_pool *pool;
1294
1295 VM_BUG_ON_PAGE(!PageMovable(page), page);
1296 VM_BUG_ON_PAGE(PageIsolated(page), page);
1297
1298 if (test_bit(PAGE_HEADLESS, &page->private))
1299 return false;
1300
1301 zhdr = page_address(page);
1302 z3fold_page_lock(zhdr);
1303 if (test_bit(NEEDS_COMPACTING, &page->private) ||
1304 test_bit(PAGE_STALE, &page->private))
1305 goto out;
1306
1307 pool = zhdr_to_pool(zhdr);
1308
1309 if (zhdr->mapped_count == 0) {
1310 kref_get(&zhdr->refcount);
1311 if (!list_empty(&zhdr->buddy))
1312 list_del_init(&zhdr->buddy);
1313 spin_lock(&pool->lock);
1314 if (!list_empty(&page->lru))
1315 list_del(&page->lru);
1316 spin_unlock(&pool->lock);
1317 z3fold_page_unlock(zhdr);
1318 return true;
1319 }
1320out:
1321 z3fold_page_unlock(zhdr);
1322 return false;
1323}
1324
1325static int z3fold_page_migrate(struct address_space *mapping, struct page *newpage,
1326 struct page *page, enum migrate_mode mode)
1327{
1328 struct z3fold_header *zhdr, *new_zhdr;
1329 struct z3fold_pool *pool;
1330 struct address_space *new_mapping;
1331
1332 VM_BUG_ON_PAGE(!PageMovable(page), page);
1333 VM_BUG_ON_PAGE(!PageIsolated(page), page);
1334
1335 zhdr = page_address(page);
1336 pool = zhdr_to_pool(zhdr);
1337
1338 if (!trylock_page(page))
1339 return -EAGAIN;
1340
1341 if (!z3fold_page_trylock(zhdr)) {
1342 unlock_page(page);
1343 return -EAGAIN;
1344 }
1345 if (zhdr->mapped_count != 0) {
1346 z3fold_page_unlock(zhdr);
1347 unlock_page(page);
1348 return -EBUSY;
1349 }
1350 new_zhdr = page_address(newpage);
1351 memcpy(new_zhdr, zhdr, PAGE_SIZE);
1352 newpage->private = page->private;
1353 page->private = 0;
1354 z3fold_page_unlock(zhdr);
1355 spin_lock_init(&new_zhdr->page_lock);
1356 new_mapping = page_mapping(page);
1357 __ClearPageMovable(page);
1358 ClearPagePrivate(page);
1359
1360 get_page(newpage);
1361 z3fold_page_lock(new_zhdr);
1362 if (new_zhdr->first_chunks)
1363 encode_handle(new_zhdr, FIRST);
1364 if (new_zhdr->last_chunks)
1365 encode_handle(new_zhdr, LAST);
1366 if (new_zhdr->middle_chunks)
1367 encode_handle(new_zhdr, MIDDLE);
1368 set_bit(NEEDS_COMPACTING, &newpage->private);
1369 new_zhdr->cpu = smp_processor_id();
1370 spin_lock(&pool->lock);
1371 list_add(&newpage->lru, &pool->lru);
1372 spin_unlock(&pool->lock);
1373 __SetPageMovable(newpage, new_mapping);
1374 z3fold_page_unlock(new_zhdr);
1375
1376 queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
1377
1378 page_mapcount_reset(page);
1379 unlock_page(page);
1380 put_page(page);
1381 return 0;
1382}
1383
1384static void z3fold_page_putback(struct page *page)
1385{
1386 struct z3fold_header *zhdr;
1387 struct z3fold_pool *pool;
1388
1389 zhdr = page_address(page);
1390 pool = zhdr_to_pool(zhdr);
1391
1392 z3fold_page_lock(zhdr);
1393 if (!list_empty(&zhdr->buddy))
1394 list_del_init(&zhdr->buddy);
1395 INIT_LIST_HEAD(&page->lru);
1396 if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
1397 atomic64_dec(&pool->pages_nr);
1398 return;
1399 }
1400 spin_lock(&pool->lock);
1401 list_add(&page->lru, &pool->lru);
1402 spin_unlock(&pool->lock);
1403 z3fold_page_unlock(zhdr);
1404}
1405
1406static const struct address_space_operations z3fold_aops = {
1407 .isolate_page = z3fold_page_isolate,
1408 .migratepage = z3fold_page_migrate,
1409 .putback_page = z3fold_page_putback,
1410};
1411
1412
1413
1414
1415
1416static int z3fold_zpool_evict(struct z3fold_pool *pool, unsigned long handle)
1417{
1418 if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict)
1419 return pool->zpool_ops->evict(pool->zpool, handle);
1420 else
1421 return -ENOENT;
1422}
1423
1424static const struct z3fold_ops z3fold_zpool_ops = {
1425 .evict = z3fold_zpool_evict
1426};
1427
1428static void *z3fold_zpool_create(const char *name, gfp_t gfp,
1429 const struct zpool_ops *zpool_ops,
1430 struct zpool *zpool)
1431{
1432 struct z3fold_pool *pool;
1433
1434 pool = z3fold_create_pool(name, gfp,
1435 zpool_ops ? &z3fold_zpool_ops : NULL);
1436 if (pool) {
1437 pool->zpool = zpool;
1438 pool->zpool_ops = zpool_ops;
1439 }
1440 return pool;
1441}
1442
1443static void z3fold_zpool_destroy(void *pool)
1444{
1445 z3fold_destroy_pool(pool);
1446}
1447
1448static int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp,
1449 unsigned long *handle)
1450{
1451 return z3fold_alloc(pool, size, gfp, handle);
1452}
1453static void z3fold_zpool_free(void *pool, unsigned long handle)
1454{
1455 z3fold_free(pool, handle);
1456}
1457
1458static int z3fold_zpool_shrink(void *pool, unsigned int pages,
1459 unsigned int *reclaimed)
1460{
1461 unsigned int total = 0;
1462 int ret = -EINVAL;
1463
1464 while (total < pages) {
1465 ret = z3fold_reclaim_page(pool, 8);
1466 if (ret < 0)
1467 break;
1468 total++;
1469 }
1470
1471 if (reclaimed)
1472 *reclaimed = total;
1473
1474 return ret;
1475}
1476
1477static void *z3fold_zpool_map(void *pool, unsigned long handle,
1478 enum zpool_mapmode mm)
1479{
1480 return z3fold_map(pool, handle);
1481}
1482static void z3fold_zpool_unmap(void *pool, unsigned long handle)
1483{
1484 z3fold_unmap(pool, handle);
1485}
1486
1487static u64 z3fold_zpool_total_size(void *pool)
1488{
1489 return z3fold_get_pool_size(pool) * PAGE_SIZE;
1490}
1491
1492static struct zpool_driver z3fold_zpool_driver = {
1493 .type = "z3fold",
1494 .owner = THIS_MODULE,
1495 .create = z3fold_zpool_create,
1496 .destroy = z3fold_zpool_destroy,
1497 .malloc = z3fold_zpool_malloc,
1498 .free = z3fold_zpool_free,
1499 .shrink = z3fold_zpool_shrink,
1500 .map = z3fold_zpool_map,
1501 .unmap = z3fold_zpool_unmap,
1502 .total_size = z3fold_zpool_total_size,
1503};
1504
1505MODULE_ALIAS("zpool-z3fold");
1506
1507static int __init init_z3fold(void)
1508{
1509 int ret;
1510
1511
1512 BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE);
1513 ret = z3fold_mount();
1514 if (ret)
1515 return ret;
1516
1517 zpool_register_driver(&z3fold_zpool_driver);
1518
1519 return 0;
1520}
1521
1522static void __exit exit_z3fold(void)
1523{
1524 z3fold_unmount();
1525 zpool_unregister_driver(&z3fold_zpool_driver);
1526}
1527
1528module_init(init_z3fold);
1529module_exit(exit_z3fold);
1530
1531MODULE_LICENSE("GPL");
1532MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>");
1533MODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages");
1534