1
2
3
4
5
6
7
8
9
10
11
12
13#include "unzip_vle.h"
14#include "compress.h"
15#include <linux/prefetch.h>
16
17#include <trace/events/erofs.h>
18
19
20
21
22
23#define PAGE_UNALLOCATED ((void *)0x5F0E4B1D)
24
25
26enum z_erofs_cache_alloctype {
27 DONTALLOC,
28 DELAYEDALLOC,
29};
30
31
32
33
34
35typedef tagptr1_t compressed_page_t;
36
37#define tag_compressed_page_justfound(page) \
38 tagptr_fold(compressed_page_t, page, 1)
39
40static struct workqueue_struct *z_erofs_workqueue __read_mostly;
41static struct kmem_cache *z_erofs_workgroup_cachep __read_mostly;
42
43void z_erofs_exit_zip_subsystem(void)
44{
45 destroy_workqueue(z_erofs_workqueue);
46 kmem_cache_destroy(z_erofs_workgroup_cachep);
47}
48
49static inline int init_unzip_workqueue(void)
50{
51 const unsigned int onlinecpus = num_possible_cpus();
52
53
54
55
56
57 z_erofs_workqueue =
58 alloc_workqueue("erofs_unzipd",
59 WQ_UNBOUND | WQ_HIGHPRI | WQ_CPU_INTENSIVE,
60 onlinecpus + onlinecpus / 4);
61
62 return z_erofs_workqueue ? 0 : -ENOMEM;
63}
64
65static void init_once(void *ptr)
66{
67 struct z_erofs_vle_workgroup *grp = ptr;
68 struct z_erofs_vle_work *const work =
69 z_erofs_vle_grab_primary_work(grp);
70 unsigned int i;
71
72 mutex_init(&work->lock);
73 work->nr_pages = 0;
74 work->vcnt = 0;
75 for (i = 0; i < Z_EROFS_CLUSTER_MAX_PAGES; ++i)
76 grp->compressed_pages[i] = NULL;
77}
78
79static void init_always(struct z_erofs_vle_workgroup *grp)
80{
81 struct z_erofs_vle_work *const work =
82 z_erofs_vle_grab_primary_work(grp);
83
84 atomic_set(&grp->obj.refcount, 1);
85 grp->flags = 0;
86
87 DBG_BUGON(work->nr_pages);
88 DBG_BUGON(work->vcnt);
89}
90
91int __init z_erofs_init_zip_subsystem(void)
92{
93 z_erofs_workgroup_cachep =
94 kmem_cache_create("erofs_compress",
95 Z_EROFS_WORKGROUP_SIZE, 0,
96 SLAB_RECLAIM_ACCOUNT, init_once);
97
98 if (z_erofs_workgroup_cachep) {
99 if (!init_unzip_workqueue())
100 return 0;
101
102 kmem_cache_destroy(z_erofs_workgroup_cachep);
103 }
104 return -ENOMEM;
105}
106
107enum z_erofs_vle_work_role {
108 Z_EROFS_VLE_WORK_SECONDARY,
109 Z_EROFS_VLE_WORK_PRIMARY,
110
111
112
113
114
115
116
117
118
119
120
121 Z_EROFS_VLE_WORK_PRIMARY_HOOKED,
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136 Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED,
137 Z_EROFS_VLE_WORK_MAX
138};
139
140struct z_erofs_vle_work_builder {
141 enum z_erofs_vle_work_role role;
142
143
144
145
146
147 bool hosted;
148
149 struct z_erofs_vle_workgroup *grp;
150 struct z_erofs_vle_work *work;
151 struct z_erofs_pagevec_ctor vector;
152
153
154 struct page **compressed_pages;
155 unsigned int compressed_deficit;
156};
157
158#define VLE_WORK_BUILDER_INIT() \
159 { .work = NULL, .role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED }
160
161#ifdef EROFS_FS_HAS_MANAGED_CACHE
162static void preload_compressed_pages(struct z_erofs_vle_work_builder *bl,
163 struct address_space *mc,
164 pgoff_t index,
165 unsigned int clusterpages,
166 enum z_erofs_cache_alloctype type,
167 struct list_head *pagepool,
168 gfp_t gfp)
169{
170 struct page **const pages = bl->compressed_pages;
171 const unsigned int remaining = bl->compressed_deficit;
172 bool standalone = true;
173 unsigned int i, j = 0;
174
175 if (bl->role < Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED)
176 return;
177
178 gfp = mapping_gfp_constraint(mc, gfp) & ~__GFP_RECLAIM;
179
180 index += clusterpages - remaining;
181
182 for (i = 0; i < remaining; ++i) {
183 struct page *page;
184 compressed_page_t t;
185
186
187 if (READ_ONCE(pages[i]))
188 continue;
189
190 page = find_get_page(mc, index + i);
191
192 if (page) {
193 t = tag_compressed_page_justfound(page);
194 } else if (type == DELAYEDALLOC) {
195 t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
196 } else {
197 if (standalone)
198 j = i;
199 standalone = false;
200 continue;
201 }
202
203 if (!cmpxchg_relaxed(&pages[i], NULL, tagptr_cast_ptr(t)))
204 continue;
205
206 if (page)
207 put_page(page);
208 }
209 bl->compressed_pages += j;
210 bl->compressed_deficit = remaining - j;
211
212 if (standalone)
213 bl->role = Z_EROFS_VLE_WORK_PRIMARY;
214}
215
216
217int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
218 struct erofs_workgroup *egrp)
219{
220 struct z_erofs_vle_workgroup *const grp =
221 container_of(egrp, struct z_erofs_vle_workgroup, obj);
222 struct address_space *const mapping = MNGD_MAPPING(sbi);
223 const int clusterpages = erofs_clusterpages(sbi);
224 int i;
225
226
227
228
229
230 for (i = 0; i < clusterpages; ++i) {
231 struct page *page = grp->compressed_pages[i];
232
233 if (!page || page->mapping != mapping)
234 continue;
235
236
237 if (!trylock_page(page))
238 return -EBUSY;
239
240
241 WRITE_ONCE(grp->compressed_pages[i], NULL);
242
243 set_page_private(page, 0);
244 ClearPagePrivate(page);
245
246 unlock_page(page);
247 put_page(page);
248 }
249 return 0;
250}
251
252int erofs_try_to_free_cached_page(struct address_space *mapping,
253 struct page *page)
254{
255 struct erofs_sb_info *const sbi = EROFS_SB(mapping->host->i_sb);
256 const unsigned int clusterpages = erofs_clusterpages(sbi);
257 struct z_erofs_vle_workgroup *const grp = (void *)page_private(page);
258 int ret = 0;
259
260 if (erofs_workgroup_try_to_freeze(&grp->obj, 1)) {
261 unsigned int i;
262
263 for (i = 0; i < clusterpages; ++i) {
264 if (grp->compressed_pages[i] == page) {
265 WRITE_ONCE(grp->compressed_pages[i], NULL);
266 ret = 1;
267 break;
268 }
269 }
270 erofs_workgroup_unfreeze(&grp->obj, 1);
271
272 if (ret) {
273 ClearPagePrivate(page);
274 put_page(page);
275 }
276 }
277 return ret;
278}
279#else
280static void preload_compressed_pages(struct z_erofs_vle_work_builder *bl,
281 struct address_space *mc,
282 pgoff_t index,
283 unsigned int clusterpages,
284 enum z_erofs_cache_alloctype type,
285 struct list_head *pagepool,
286 gfp_t gfp)
287{
288
289}
290#endif
291
292
293static inline bool try_to_reuse_as_compressed_page(
294 struct z_erofs_vle_work_builder *b,
295 struct page *page)
296{
297 while (b->compressed_deficit) {
298 --b->compressed_deficit;
299 if (!cmpxchg(b->compressed_pages++, NULL, page))
300 return true;
301 }
302
303 return false;
304}
305
306
307static int z_erofs_vle_work_add_page(
308 struct z_erofs_vle_work_builder *builder,
309 struct page *page,
310 enum z_erofs_page_type type)
311{
312 int ret;
313 bool occupied;
314
315
316 if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY &&
317 type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
318 try_to_reuse_as_compressed_page(builder, page))
319 return 0;
320
321 ret = z_erofs_pagevec_ctor_enqueue(&builder->vector,
322 page, type, &occupied);
323 builder->work->vcnt += (unsigned int)ret;
324
325 return ret ? 0 : -EAGAIN;
326}
327
328static enum z_erofs_vle_work_role
329try_to_claim_workgroup(struct z_erofs_vle_workgroup *grp,
330 z_erofs_vle_owned_workgrp_t *owned_head,
331 bool *hosted)
332{
333 DBG_BUGON(*hosted);
334
335
336retry:
337 if (grp->next == Z_EROFS_VLE_WORKGRP_NIL) {
338
339 if (cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_NIL,
340 *owned_head) != Z_EROFS_VLE_WORKGRP_NIL)
341 goto retry;
342
343 *owned_head = &grp->next;
344 *hosted = true;
345
346 return Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
347
348 } else if (grp->next == Z_EROFS_VLE_WORKGRP_TAIL) {
349
350
351
352
353
354 if (cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL,
355 *owned_head) != Z_EROFS_VLE_WORKGRP_TAIL)
356 goto retry;
357 *owned_head = Z_EROFS_VLE_WORKGRP_TAIL;
358 return Z_EROFS_VLE_WORK_PRIMARY_HOOKED;
359 }
360
361 return Z_EROFS_VLE_WORK_PRIMARY;
362}
363
364struct z_erofs_vle_work_finder {
365 struct super_block *sb;
366 pgoff_t idx;
367 unsigned int pageofs;
368
369 struct z_erofs_vle_workgroup **grp_ret;
370 enum z_erofs_vle_work_role *role;
371 z_erofs_vle_owned_workgrp_t *owned_head;
372 bool *hosted;
373};
374
375static struct z_erofs_vle_work *
376z_erofs_vle_work_lookup(const struct z_erofs_vle_work_finder *f)
377{
378 bool tag, primary;
379 struct erofs_workgroup *egrp;
380 struct z_erofs_vle_workgroup *grp;
381 struct z_erofs_vle_work *work;
382
383 egrp = erofs_find_workgroup(f->sb, f->idx, &tag);
384 if (!egrp) {
385 *f->grp_ret = NULL;
386 return NULL;
387 }
388
389 grp = container_of(egrp, struct z_erofs_vle_workgroup, obj);
390 *f->grp_ret = grp;
391
392 work = z_erofs_vle_grab_work(grp, f->pageofs);
393
394 primary = true;
395
396 DBG_BUGON(work->pageofs != f->pageofs);
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434 mutex_lock(&work->lock);
435
436 *f->hosted = false;
437 if (!primary)
438 *f->role = Z_EROFS_VLE_WORK_SECONDARY;
439 else
440 *f->role = try_to_claim_workgroup(grp, f->owned_head,
441 f->hosted);
442 return work;
443}
444
445static struct z_erofs_vle_work *
446z_erofs_vle_work_register(const struct z_erofs_vle_work_finder *f,
447 struct erofs_map_blocks *map)
448{
449 bool gnew = false;
450 struct z_erofs_vle_workgroup *grp = *f->grp_ret;
451 struct z_erofs_vle_work *work;
452
453
454 if (unlikely(grp)) {
455 DBG_BUGON(1);
456 return ERR_PTR(-EINVAL);
457 }
458
459
460 grp = kmem_cache_alloc(z_erofs_workgroup_cachep, GFP_NOFS);
461 if (unlikely(!grp))
462 return ERR_PTR(-ENOMEM);
463
464 init_always(grp);
465 grp->obj.index = f->idx;
466 grp->llen = map->m_llen;
467
468 z_erofs_vle_set_workgrp_fmt(grp, (map->m_flags & EROFS_MAP_ZIPPED) ?
469 Z_EROFS_VLE_WORKGRP_FMT_LZ4 :
470 Z_EROFS_VLE_WORKGRP_FMT_PLAIN);
471
472 if (map->m_flags & EROFS_MAP_FULL_MAPPED)
473 grp->flags |= Z_EROFS_VLE_WORKGRP_FULL_LENGTH;
474
475
476 WRITE_ONCE(grp->next, *f->owned_head);
477
478 *f->role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
479
480 *f->hosted = true;
481
482 gnew = true;
483 work = z_erofs_vle_grab_primary_work(grp);
484 work->pageofs = f->pageofs;
485
486
487
488
489
490 mutex_trylock(&work->lock);
491
492 if (gnew) {
493 int err = erofs_register_workgroup(f->sb, &grp->obj, 0);
494
495 if (err) {
496 mutex_unlock(&work->lock);
497 kmem_cache_free(z_erofs_workgroup_cachep, grp);
498 return ERR_PTR(-EAGAIN);
499 }
500 }
501
502 *f->owned_head = &grp->next;
503 *f->grp_ret = grp;
504 return work;
505}
506
507#define builder_is_hooked(builder) \
508 ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_HOOKED)
509
510#define builder_is_followed(builder) \
511 ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED)
512
513static int z_erofs_vle_work_iter_begin(struct z_erofs_vle_work_builder *builder,
514 struct super_block *sb,
515 struct erofs_map_blocks *map,
516 z_erofs_vle_owned_workgrp_t *owned_head)
517{
518 const unsigned int clusterpages = erofs_clusterpages(EROFS_SB(sb));
519 struct z_erofs_vle_workgroup *grp;
520 const struct z_erofs_vle_work_finder finder = {
521 .sb = sb,
522 .idx = erofs_blknr(map->m_pa),
523 .pageofs = map->m_la & ~PAGE_MASK,
524 .grp_ret = &grp,
525 .role = &builder->role,
526 .owned_head = owned_head,
527 .hosted = &builder->hosted
528 };
529 struct z_erofs_vle_work *work;
530
531 DBG_BUGON(builder->work);
532
533
534 DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_NIL);
535 DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
536
537 DBG_BUGON(erofs_blkoff(map->m_pa));
538
539repeat:
540 work = z_erofs_vle_work_lookup(&finder);
541 if (work) {
542 unsigned int orig_llen;
543
544
545 while ((orig_llen = READ_ONCE(grp->llen)) < map->m_llen &&
546 orig_llen != cmpxchg_relaxed(&grp->llen,
547 orig_llen, map->m_llen))
548 cpu_relax();
549 goto got_it;
550 }
551
552 work = z_erofs_vle_work_register(&finder, map);
553 if (unlikely(work == ERR_PTR(-EAGAIN)))
554 goto repeat;
555
556 if (IS_ERR(work))
557 return PTR_ERR(work);
558got_it:
559 z_erofs_pagevec_ctor_init(&builder->vector, Z_EROFS_NR_INLINE_PAGEVECS,
560 work->pagevec, work->vcnt);
561
562 if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY) {
563
564 builder->compressed_pages = grp->compressed_pages;
565 builder->compressed_deficit = clusterpages;
566 } else {
567 builder->compressed_pages = NULL;
568 builder->compressed_deficit = 0;
569 }
570
571 builder->grp = grp;
572 builder->work = work;
573 return 0;
574}
575
576
577
578
579
580
581static void z_erofs_rcu_callback(struct rcu_head *head)
582{
583 struct z_erofs_vle_work *work = container_of(head,
584 struct z_erofs_vle_work, rcu);
585 struct z_erofs_vle_workgroup *grp =
586 z_erofs_vle_work_workgroup(work, true);
587
588 kmem_cache_free(z_erofs_workgroup_cachep, grp);
589}
590
591void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
592{
593 struct z_erofs_vle_workgroup *const vgrp = container_of(grp,
594 struct z_erofs_vle_workgroup, obj);
595 struct z_erofs_vle_work *const work = &vgrp->work;
596
597 call_rcu(&work->rcu, z_erofs_rcu_callback);
598}
599
600static void
601__z_erofs_vle_work_release(struct z_erofs_vle_workgroup *grp,
602 struct z_erofs_vle_work *work __maybe_unused)
603{
604 erofs_workgroup_put(&grp->obj);
605}
606
607static void z_erofs_vle_work_release(struct z_erofs_vle_work *work)
608{
609 struct z_erofs_vle_workgroup *grp =
610 z_erofs_vle_work_workgroup(work, true);
611
612 __z_erofs_vle_work_release(grp, work);
613}
614
615static inline bool
616z_erofs_vle_work_iter_end(struct z_erofs_vle_work_builder *builder)
617{
618 struct z_erofs_vle_work *work = builder->work;
619
620 if (!work)
621 return false;
622
623 z_erofs_pagevec_ctor_exit(&builder->vector, false);
624 mutex_unlock(&work->lock);
625
626
627
628
629
630 if (!builder->hosted)
631 __z_erofs_vle_work_release(builder->grp, work);
632
633 builder->work = NULL;
634 builder->grp = NULL;
635 return true;
636}
637
638static inline struct page *__stagingpage_alloc(struct list_head *pagepool,
639 gfp_t gfp)
640{
641 struct page *page = erofs_allocpage(pagepool, gfp);
642
643 if (unlikely(!page))
644 return NULL;
645
646 page->mapping = Z_EROFS_MAPPING_STAGING;
647 return page;
648}
649
650struct z_erofs_vle_frontend {
651 struct inode *const inode;
652
653 struct z_erofs_vle_work_builder builder;
654 struct erofs_map_blocks map;
655
656 z_erofs_vle_owned_workgrp_t owned_head;
657
658
659 bool backmost;
660 erofs_off_t headoffset;
661};
662
663#define VLE_FRONTEND_INIT(__i) { \
664 .inode = __i, \
665 .map = { \
666 .m_llen = 0, \
667 .m_plen = 0, \
668 .mpage = NULL \
669 }, \
670 .builder = VLE_WORK_BUILDER_INIT(), \
671 .owned_head = Z_EROFS_VLE_WORKGRP_TAIL, \
672 .backmost = true, }
673
674#ifdef EROFS_FS_HAS_MANAGED_CACHE
675static inline bool
676should_alloc_managed_pages(struct z_erofs_vle_frontend *fe, erofs_off_t la)
677{
678 if (fe->backmost)
679 return true;
680
681 if (EROFS_FS_ZIP_CACHE_LVL >= 2)
682 return la < fe->headoffset;
683
684 return false;
685}
686#else
687static inline bool
688should_alloc_managed_pages(struct z_erofs_vle_frontend *fe, erofs_off_t la)
689{
690 return false;
691}
692#endif
693
694static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe,
695 struct page *page,
696 struct list_head *page_pool)
697{
698 struct super_block *const sb = fe->inode->i_sb;
699 struct erofs_sb_info *const sbi __maybe_unused = EROFS_SB(sb);
700 struct erofs_map_blocks *const map = &fe->map;
701 struct z_erofs_vle_work_builder *const builder = &fe->builder;
702 const loff_t offset = page_offset(page);
703
704 bool tight = builder_is_hooked(builder);
705 struct z_erofs_vle_work *work = builder->work;
706
707 enum z_erofs_cache_alloctype cache_strategy;
708 enum z_erofs_page_type page_type;
709 unsigned int cur, end, spiltted, index;
710 int err = 0;
711
712
713 z_erofs_onlinepage_init(page);
714
715 spiltted = 0;
716 end = PAGE_SIZE;
717repeat:
718 cur = end - 1;
719
720
721 if (offset + cur >= map->m_la &&
722 offset + cur < map->m_la + map->m_llen) {
723
724 if (!builder->work)
725 goto restart_now;
726 goto hitted;
727 }
728
729
730 debugln("%s: [out-of-range] pos %llu", __func__, offset + cur);
731
732 if (z_erofs_vle_work_iter_end(builder))
733 fe->backmost = false;
734
735 map->m_la = offset + cur;
736 map->m_llen = 0;
737 err = z_erofs_map_blocks_iter(fe->inode, map, 0);
738 if (unlikely(err))
739 goto err_out;
740
741restart_now:
742 if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED)))
743 goto hitted;
744
745 DBG_BUGON(map->m_plen != 1 << sbi->clusterbits);
746 DBG_BUGON(erofs_blkoff(map->m_pa));
747
748 err = z_erofs_vle_work_iter_begin(builder, sb, map, &fe->owned_head);
749 if (unlikely(err))
750 goto err_out;
751
752
753 if (should_alloc_managed_pages(fe, map->m_la))
754 cache_strategy = DELAYEDALLOC;
755 else
756 cache_strategy = DONTALLOC;
757
758 preload_compressed_pages(builder, MNGD_MAPPING(sbi),
759 map->m_pa / PAGE_SIZE,
760 map->m_plen / PAGE_SIZE,
761 cache_strategy, page_pool, GFP_KERNEL);
762
763 tight &= builder_is_hooked(builder);
764 work = builder->work;
765hitted:
766 cur = end - min_t(unsigned int, offset + end - map->m_la, end);
767 if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED))) {
768 zero_user_segment(page, cur, end);
769 goto next_part;
770 }
771
772
773 page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
774 (!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
775 (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
776 Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
777
778 if (cur)
779 tight &= builder_is_followed(builder);
780
781retry:
782 err = z_erofs_vle_work_add_page(builder, page, page_type);
783
784 if (err == -EAGAIN) {
785 struct page *const newpage =
786 __stagingpage_alloc(page_pool, GFP_NOFS);
787
788 err = z_erofs_vle_work_add_page(builder, newpage,
789 Z_EROFS_PAGE_TYPE_EXCLUSIVE);
790 if (likely(!err))
791 goto retry;
792 }
793
794 if (unlikely(err))
795 goto err_out;
796
797 index = page->index - map->m_la / PAGE_SIZE;
798
799
800 z_erofs_onlinepage_fixup(page, index, true);
801
802
803 ++spiltted;
804
805 work->nr_pages = max_t(pgoff_t, work->nr_pages, index + 1);
806next_part:
807
808 map->m_llen = offset + cur - map->m_la;
809
810 end = cur;
811 if (end > 0)
812 goto repeat;
813
814out:
815
816 z_erofs_onlinepage_endio(page);
817
818 debugln("%s, finish page: %pK spiltted: %u map->m_llen %llu",
819 __func__, page, spiltted, map->m_llen);
820 return err;
821
822
823err_out:
824 SetPageError(page);
825 goto out;
826}
827
828static void z_erofs_vle_unzip_kickoff(void *ptr, int bios)
829{
830 tagptr1_t t = tagptr_init(tagptr1_t, ptr);
831 struct z_erofs_vle_unzip_io *io = tagptr_unfold_ptr(t);
832 bool background = tagptr_unfold_tags(t);
833
834 if (!background) {
835 unsigned long flags;
836
837 spin_lock_irqsave(&io->u.wait.lock, flags);
838 if (!atomic_add_return(bios, &io->pending_bios))
839 wake_up_locked(&io->u.wait);
840 spin_unlock_irqrestore(&io->u.wait.lock, flags);
841 return;
842 }
843
844 if (!atomic_add_return(bios, &io->pending_bios))
845 queue_work(z_erofs_workqueue, &io->u.work);
846}
847
848static inline void z_erofs_vle_read_endio(struct bio *bio)
849{
850 struct erofs_sb_info *sbi = NULL;
851 blk_status_t err = bio->bi_status;
852 struct bio_vec *bvec;
853 struct bvec_iter_all iter_all;
854
855 bio_for_each_segment_all(bvec, bio, iter_all) {
856 struct page *page = bvec->bv_page;
857 bool cachemngd = false;
858
859 DBG_BUGON(PageUptodate(page));
860 DBG_BUGON(!page->mapping);
861
862 if (unlikely(!sbi && !z_erofs_page_is_staging(page))) {
863 sbi = EROFS_SB(page->mapping->host->i_sb);
864
865 if (time_to_inject(sbi, FAULT_READ_IO)) {
866 erofs_show_injection_info(FAULT_READ_IO);
867 err = BLK_STS_IOERR;
868 }
869 }
870
871
872 if (sbi)
873 cachemngd = erofs_page_is_managed(sbi, page);
874
875 if (unlikely(err))
876 SetPageError(page);
877 else if (cachemngd)
878 SetPageUptodate(page);
879
880 if (cachemngd)
881 unlock_page(page);
882 }
883
884 z_erofs_vle_unzip_kickoff(bio->bi_private, -1);
885 bio_put(bio);
886}
887
888static struct page *z_pagemap_global[Z_EROFS_VLE_VMAP_GLOBAL_PAGES];
889static DEFINE_MUTEX(z_pagemap_global_lock);
890
891static int z_erofs_vle_unzip(struct super_block *sb,
892 struct z_erofs_vle_workgroup *grp,
893 struct list_head *page_pool)
894{
895 struct erofs_sb_info *const sbi = EROFS_SB(sb);
896 const unsigned int clusterpages = erofs_clusterpages(sbi);
897
898 struct z_erofs_pagevec_ctor ctor;
899 unsigned int nr_pages;
900 unsigned int sparsemem_pages = 0;
901 struct page *pages_onstack[Z_EROFS_VLE_VMAP_ONSTACK_PAGES];
902 struct page **pages, **compressed_pages, *page;
903 unsigned int algorithm;
904 unsigned int i, outputsize;
905
906 enum z_erofs_page_type page_type;
907 bool overlapped, partial;
908 struct z_erofs_vle_work *work;
909 int err;
910
911 might_sleep();
912 work = z_erofs_vle_grab_primary_work(grp);
913 DBG_BUGON(!READ_ONCE(work->nr_pages));
914
915 mutex_lock(&work->lock);
916 nr_pages = work->nr_pages;
917
918 if (likely(nr_pages <= Z_EROFS_VLE_VMAP_ONSTACK_PAGES))
919 pages = pages_onstack;
920 else if (nr_pages <= Z_EROFS_VLE_VMAP_GLOBAL_PAGES &&
921 mutex_trylock(&z_pagemap_global_lock))
922 pages = z_pagemap_global;
923 else {
924repeat:
925 pages = kvmalloc_array(nr_pages, sizeof(struct page *),
926 GFP_KERNEL);
927
928
929 if (unlikely(!pages)) {
930 if (nr_pages > Z_EROFS_VLE_VMAP_GLOBAL_PAGES)
931 goto repeat;
932 else {
933 mutex_lock(&z_pagemap_global_lock);
934 pages = z_pagemap_global;
935 }
936 }
937 }
938
939 for (i = 0; i < nr_pages; ++i)
940 pages[i] = NULL;
941
942 z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS,
943 work->pagevec, 0);
944
945 for (i = 0; i < work->vcnt; ++i) {
946 unsigned int pagenr;
947
948 page = z_erofs_pagevec_ctor_dequeue(&ctor, &page_type);
949
950
951 DBG_BUGON(!page);
952 DBG_BUGON(!page->mapping);
953
954 if (z_erofs_put_stagingpage(page_pool, page))
955 continue;
956
957 if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
958 pagenr = 0;
959 else
960 pagenr = z_erofs_onlinepage_index(page);
961
962 DBG_BUGON(pagenr >= nr_pages);
963 DBG_BUGON(pages[pagenr]);
964
965 pages[pagenr] = page;
966 }
967 sparsemem_pages = i;
968
969 z_erofs_pagevec_ctor_exit(&ctor, true);
970
971 overlapped = false;
972 compressed_pages = grp->compressed_pages;
973
974 err = 0;
975 for (i = 0; i < clusterpages; ++i) {
976 unsigned int pagenr;
977
978 page = compressed_pages[i];
979
980
981 DBG_BUGON(!page);
982 DBG_BUGON(!page->mapping);
983
984 if (!z_erofs_page_is_staging(page)) {
985 if (erofs_page_is_managed(sbi, page)) {
986 if (unlikely(!PageUptodate(page)))
987 err = -EIO;
988 continue;
989 }
990
991
992
993
994
995 pagenr = z_erofs_onlinepage_index(page);
996
997 DBG_BUGON(pagenr >= nr_pages);
998 DBG_BUGON(pages[pagenr]);
999 ++sparsemem_pages;
1000 pages[pagenr] = page;
1001
1002 overlapped = true;
1003 }
1004
1005
1006 if (unlikely(PageError(page))) {
1007 DBG_BUGON(PageUptodate(page));
1008 err = -EIO;
1009 }
1010 }
1011
1012 if (unlikely(err))
1013 goto out;
1014
1015 if (nr_pages << PAGE_SHIFT >= work->pageofs + grp->llen) {
1016 outputsize = grp->llen;
1017 partial = !(grp->flags & Z_EROFS_VLE_WORKGRP_FULL_LENGTH);
1018 } else {
1019 outputsize = (nr_pages << PAGE_SHIFT) - work->pageofs;
1020 partial = true;
1021 }
1022
1023 if (z_erofs_vle_workgrp_fmt(grp) == Z_EROFS_VLE_WORKGRP_FMT_PLAIN)
1024 algorithm = Z_EROFS_COMPRESSION_SHIFTED;
1025 else
1026 algorithm = Z_EROFS_COMPRESSION_LZ4;
1027
1028 err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
1029 .sb = sb,
1030 .in = compressed_pages,
1031 .out = pages,
1032 .pageofs_out = work->pageofs,
1033 .inputsize = PAGE_SIZE,
1034 .outputsize = outputsize,
1035 .alg = algorithm,
1036 .inplace_io = overlapped,
1037 .partial_decoding = partial
1038 }, page_pool);
1039
1040out:
1041
1042 for (i = 0; i < clusterpages; ++i) {
1043 page = compressed_pages[i];
1044
1045 if (erofs_page_is_managed(sbi, page))
1046 continue;
1047
1048
1049 (void)z_erofs_put_stagingpage(page_pool, page);
1050
1051 WRITE_ONCE(compressed_pages[i], NULL);
1052 }
1053
1054 for (i = 0; i < nr_pages; ++i) {
1055 page = pages[i];
1056 if (!page)
1057 continue;
1058
1059 DBG_BUGON(!page->mapping);
1060
1061
1062 if (z_erofs_put_stagingpage(page_pool, page))
1063 continue;
1064
1065 if (unlikely(err < 0))
1066 SetPageError(page);
1067
1068 z_erofs_onlinepage_endio(page);
1069 }
1070
1071 if (pages == z_pagemap_global)
1072 mutex_unlock(&z_pagemap_global_lock);
1073 else if (unlikely(pages != pages_onstack))
1074 kvfree(pages);
1075
1076 work->nr_pages = 0;
1077 work->vcnt = 0;
1078
1079
1080
1081 WRITE_ONCE(grp->next, Z_EROFS_VLE_WORKGRP_NIL);
1082
1083
1084 mutex_unlock(&work->lock);
1085
1086 z_erofs_vle_work_release(work);
1087 return err;
1088}
1089
1090static void z_erofs_vle_unzip_all(struct super_block *sb,
1091 struct z_erofs_vle_unzip_io *io,
1092 struct list_head *page_pool)
1093{
1094 z_erofs_vle_owned_workgrp_t owned = io->head;
1095
1096 while (owned != Z_EROFS_VLE_WORKGRP_TAIL_CLOSED) {
1097 struct z_erofs_vle_workgroup *grp;
1098
1099
1100 DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_TAIL);
1101
1102
1103 DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_NIL);
1104
1105 grp = container_of(owned, struct z_erofs_vle_workgroup, next);
1106 owned = READ_ONCE(grp->next);
1107
1108 z_erofs_vle_unzip(sb, grp, page_pool);
1109 }
1110}
1111
1112static void z_erofs_vle_unzip_wq(struct work_struct *work)
1113{
1114 struct z_erofs_vle_unzip_io_sb *iosb = container_of(work,
1115 struct z_erofs_vle_unzip_io_sb, io.u.work);
1116 LIST_HEAD(page_pool);
1117
1118 DBG_BUGON(iosb->io.head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
1119 z_erofs_vle_unzip_all(iosb->sb, &iosb->io, &page_pool);
1120
1121 put_pages_list(&page_pool);
1122 kvfree(iosb);
1123}
1124
1125static struct page *
1126pickup_page_for_submission(struct z_erofs_vle_workgroup *grp,
1127 unsigned int nr,
1128 struct list_head *pagepool,
1129 struct address_space *mc,
1130 gfp_t gfp)
1131{
1132
1133 const bool nocache = __builtin_constant_p(mc) ? !mc : false;
1134 const pgoff_t index = grp->obj.index;
1135 bool tocache = false;
1136
1137 struct address_space *mapping;
1138 struct page *oldpage, *page;
1139
1140 compressed_page_t t;
1141 int justfound;
1142
1143repeat:
1144 page = READ_ONCE(grp->compressed_pages[nr]);
1145 oldpage = page;
1146
1147 if (!page)
1148 goto out_allocpage;
1149
1150
1151
1152
1153
1154 if (!nocache && page == PAGE_UNALLOCATED) {
1155 tocache = true;
1156 goto out_allocpage;
1157 }
1158
1159
1160 t = tagptr_init(compressed_page_t, page);
1161 justfound = tagptr_unfold_tags(t);
1162 page = tagptr_unfold_ptr(t);
1163
1164 mapping = READ_ONCE(page->mapping);
1165
1166
1167
1168
1169
1170 if (nocache) {
1171
1172 DBG_BUGON(justfound);
1173
1174
1175 DBG_BUGON(!PageLocked(page));
1176 DBG_BUGON(PageUptodate(page));
1177 DBG_BUGON(!mapping);
1178 goto out;
1179 }
1180
1181
1182
1183
1184
1185 if (mapping && mapping != mc)
1186
1187 goto out;
1188
1189 lock_page(page);
1190
1191
1192 DBG_BUGON(justfound && PagePrivate(page));
1193
1194
1195 if (page->mapping == mc) {
1196 WRITE_ONCE(grp->compressed_pages[nr], page);
1197
1198 ClearPageError(page);
1199 if (!PagePrivate(page)) {
1200
1201
1202
1203
1204
1205 DBG_BUGON(!justfound);
1206
1207 justfound = 0;
1208 set_page_private(page, (unsigned long)grp);
1209 SetPagePrivate(page);
1210 }
1211
1212
1213 if (PageUptodate(page)) {
1214 unlock_page(page);
1215 page = NULL;
1216 }
1217 goto out;
1218 }
1219
1220
1221
1222
1223
1224 DBG_BUGON(page->mapping);
1225 DBG_BUGON(!justfound);
1226
1227 tocache = true;
1228 unlock_page(page);
1229 put_page(page);
1230out_allocpage:
1231 page = __stagingpage_alloc(pagepool, gfp);
1232 if (oldpage != cmpxchg(&grp->compressed_pages[nr], oldpage, page)) {
1233 list_add(&page->lru, pagepool);
1234 cpu_relax();
1235 goto repeat;
1236 }
1237 if (nocache || !tocache)
1238 goto out;
1239 if (add_to_page_cache_lru(page, mc, index + nr, gfp)) {
1240 page->mapping = Z_EROFS_MAPPING_STAGING;
1241 goto out;
1242 }
1243
1244 set_page_private(page, (unsigned long)grp);
1245 SetPagePrivate(page);
1246out:
1247 return page;
1248}
1249
1250static struct z_erofs_vle_unzip_io *
1251jobqueue_init(struct super_block *sb,
1252 struct z_erofs_vle_unzip_io *io,
1253 bool foreground)
1254{
1255 struct z_erofs_vle_unzip_io_sb *iosb;
1256
1257 if (foreground) {
1258
1259 DBG_BUGON(!io);
1260
1261 init_waitqueue_head(&io->u.wait);
1262 atomic_set(&io->pending_bios, 0);
1263 goto out;
1264 }
1265
1266 iosb = kvzalloc(sizeof(*iosb), GFP_KERNEL | __GFP_NOFAIL);
1267 DBG_BUGON(!iosb);
1268
1269
1270 io = &iosb->io;
1271 iosb->sb = sb;
1272 INIT_WORK(&io->u.work, z_erofs_vle_unzip_wq);
1273out:
1274 io->head = Z_EROFS_VLE_WORKGRP_TAIL_CLOSED;
1275 return io;
1276}
1277
1278
1279enum {
1280#ifdef EROFS_FS_HAS_MANAGED_CACHE
1281 JQ_BYPASS,
1282#endif
1283 JQ_SUBMIT,
1284 NR_JOBQUEUES,
1285};
1286
1287static void *jobqueueset_init(struct super_block *sb,
1288 z_erofs_vle_owned_workgrp_t qtail[],
1289 struct z_erofs_vle_unzip_io *q[],
1290 struct z_erofs_vle_unzip_io *fgq,
1291 bool forcefg)
1292{
1293#ifdef EROFS_FS_HAS_MANAGED_CACHE
1294
1295
1296
1297
1298 q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, true);
1299 qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head;
1300#endif
1301
1302 q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, forcefg);
1303 qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head;
1304
1305 return tagptr_cast_ptr(tagptr_fold(tagptr1_t, q[JQ_SUBMIT], !forcefg));
1306}
1307
1308#ifdef EROFS_FS_HAS_MANAGED_CACHE
1309static void move_to_bypass_jobqueue(struct z_erofs_vle_workgroup *grp,
1310 z_erofs_vle_owned_workgrp_t qtail[],
1311 z_erofs_vle_owned_workgrp_t owned_head)
1312{
1313 z_erofs_vle_owned_workgrp_t *const submit_qtail = qtail[JQ_SUBMIT];
1314 z_erofs_vle_owned_workgrp_t *const bypass_qtail = qtail[JQ_BYPASS];
1315
1316 DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
1317 if (owned_head == Z_EROFS_VLE_WORKGRP_TAIL)
1318 owned_head = Z_EROFS_VLE_WORKGRP_TAIL_CLOSED;
1319
1320 WRITE_ONCE(grp->next, Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
1321
1322 WRITE_ONCE(*submit_qtail, owned_head);
1323 WRITE_ONCE(*bypass_qtail, &grp->next);
1324
1325 qtail[JQ_BYPASS] = &grp->next;
1326}
1327
1328static bool postsubmit_is_all_bypassed(struct z_erofs_vle_unzip_io *q[],
1329 unsigned int nr_bios,
1330 bool force_fg)
1331{
1332
1333
1334
1335
1336 if (force_fg || nr_bios)
1337 return false;
1338
1339 kvfree(container_of(q[JQ_SUBMIT],
1340 struct z_erofs_vle_unzip_io_sb,
1341 io));
1342 return true;
1343}
1344#else
1345static void move_to_bypass_jobqueue(struct z_erofs_vle_workgroup *grp,
1346 z_erofs_vle_owned_workgrp_t qtail[],
1347 z_erofs_vle_owned_workgrp_t owned_head)
1348{
1349
1350 DBG_BUGON(1);
1351}
1352
1353static bool postsubmit_is_all_bypassed(struct z_erofs_vle_unzip_io *q[],
1354 unsigned int nr_bios,
1355 bool force_fg)
1356{
1357
1358 DBG_BUGON(!nr_bios);
1359 return false;
1360}
1361#endif
1362
1363static bool z_erofs_vle_submit_all(struct super_block *sb,
1364 z_erofs_vle_owned_workgrp_t owned_head,
1365 struct list_head *pagepool,
1366 struct z_erofs_vle_unzip_io *fgq,
1367 bool force_fg)
1368{
1369 struct erofs_sb_info *const sbi = EROFS_SB(sb);
1370 const unsigned int clusterpages = erofs_clusterpages(sbi);
1371 const gfp_t gfp = GFP_NOFS;
1372
1373 z_erofs_vle_owned_workgrp_t qtail[NR_JOBQUEUES];
1374 struct z_erofs_vle_unzip_io *q[NR_JOBQUEUES];
1375 struct bio *bio;
1376 void *bi_private;
1377
1378 pgoff_t uninitialized_var(last_index);
1379 bool force_submit = false;
1380 unsigned int nr_bios;
1381
1382 if (unlikely(owned_head == Z_EROFS_VLE_WORKGRP_TAIL))
1383 return false;
1384
1385 force_submit = false;
1386 bio = NULL;
1387 nr_bios = 0;
1388 bi_private = jobqueueset_init(sb, qtail, q, fgq, force_fg);
1389
1390
1391 q[JQ_SUBMIT]->head = owned_head;
1392
1393 do {
1394 struct z_erofs_vle_workgroup *grp;
1395 pgoff_t first_index;
1396 struct page *page;
1397 unsigned int i = 0, bypass = 0;
1398 int err;
1399
1400
1401 DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
1402 DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_NIL);
1403
1404 grp = container_of(owned_head,
1405 struct z_erofs_vle_workgroup, next);
1406
1407
1408 owned_head = cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL,
1409 Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
1410
1411 first_index = grp->obj.index;
1412 force_submit |= (first_index != last_index + 1);
1413
1414repeat:
1415 page = pickup_page_for_submission(grp, i, pagepool,
1416 MNGD_MAPPING(sbi), gfp);
1417 if (!page) {
1418 force_submit = true;
1419 ++bypass;
1420 goto skippage;
1421 }
1422
1423 if (bio && force_submit) {
1424submit_bio_retry:
1425 __submit_bio(bio, REQ_OP_READ, 0);
1426 bio = NULL;
1427 }
1428
1429 if (!bio) {
1430 bio = erofs_grab_bio(sb, first_index + i,
1431 BIO_MAX_PAGES, bi_private,
1432 z_erofs_vle_read_endio, true);
1433 ++nr_bios;
1434 }
1435
1436 err = bio_add_page(bio, page, PAGE_SIZE, 0);
1437 if (err < PAGE_SIZE)
1438 goto submit_bio_retry;
1439
1440 force_submit = false;
1441 last_index = first_index + i;
1442skippage:
1443 if (++i < clusterpages)
1444 goto repeat;
1445
1446 if (bypass < clusterpages)
1447 qtail[JQ_SUBMIT] = &grp->next;
1448 else
1449 move_to_bypass_jobqueue(grp, qtail, owned_head);
1450 } while (owned_head != Z_EROFS_VLE_WORKGRP_TAIL);
1451
1452 if (bio)
1453 __submit_bio(bio, REQ_OP_READ, 0);
1454
1455 if (postsubmit_is_all_bypassed(q, nr_bios, force_fg))
1456 return true;
1457
1458 z_erofs_vle_unzip_kickoff(bi_private, nr_bios);
1459 return true;
1460}
1461
1462static void z_erofs_submit_and_unzip(struct z_erofs_vle_frontend *f,
1463 struct list_head *pagepool,
1464 bool force_fg)
1465{
1466 struct super_block *sb = f->inode->i_sb;
1467 struct z_erofs_vle_unzip_io io[NR_JOBQUEUES];
1468
1469 if (!z_erofs_vle_submit_all(sb, f->owned_head, pagepool, io, force_fg))
1470 return;
1471
1472#ifdef EROFS_FS_HAS_MANAGED_CACHE
1473 z_erofs_vle_unzip_all(sb, &io[JQ_BYPASS], pagepool);
1474#endif
1475 if (!force_fg)
1476 return;
1477
1478
1479 wait_event(io[JQ_SUBMIT].u.wait,
1480 !atomic_read(&io[JQ_SUBMIT].pending_bios));
1481
1482
1483 z_erofs_vle_unzip_all(sb, &io[JQ_SUBMIT], pagepool);
1484}
1485
1486static int z_erofs_vle_normalaccess_readpage(struct file *file,
1487 struct page *page)
1488{
1489 struct inode *const inode = page->mapping->host;
1490 struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode);
1491 int err;
1492 LIST_HEAD(pagepool);
1493
1494 trace_erofs_readpage(page, false);
1495
1496 f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
1497
1498 err = z_erofs_do_read_page(&f, page, &pagepool);
1499 (void)z_erofs_vle_work_iter_end(&f.builder);
1500
1501 if (err) {
1502 errln("%s, failed to read, err [%d]", __func__, err);
1503 goto out;
1504 }
1505
1506 z_erofs_submit_and_unzip(&f, &pagepool, true);
1507out:
1508 if (f.map.mpage)
1509 put_page(f.map.mpage);
1510
1511
1512 put_pages_list(&pagepool);
1513 return 0;
1514}
1515
1516static int z_erofs_vle_normalaccess_readpages(struct file *filp,
1517 struct address_space *mapping,
1518 struct list_head *pages,
1519 unsigned int nr_pages)
1520{
1521 struct inode *const inode = mapping->host;
1522 struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
1523
1524 bool sync = __should_decompress_synchronously(sbi, nr_pages);
1525 struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode);
1526 gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
1527 struct page *head = NULL;
1528 LIST_HEAD(pagepool);
1529
1530 trace_erofs_readpages(mapping->host, lru_to_page(pages),
1531 nr_pages, false);
1532
1533 f.headoffset = (erofs_off_t)lru_to_page(pages)->index << PAGE_SHIFT;
1534
1535 for (; nr_pages; --nr_pages) {
1536 struct page *page = lru_to_page(pages);
1537
1538 prefetchw(&page->flags);
1539 list_del(&page->lru);
1540
1541
1542
1543
1544
1545
1546 sync &= !(PageReadahead(page) && !head);
1547
1548 if (add_to_page_cache_lru(page, mapping, page->index, gfp)) {
1549 list_add(&page->lru, &pagepool);
1550 continue;
1551 }
1552
1553 set_page_private(page, (unsigned long)head);
1554 head = page;
1555 }
1556
1557 while (head) {
1558 struct page *page = head;
1559 int err;
1560
1561
1562 head = (void *)page_private(page);
1563
1564 err = z_erofs_do_read_page(&f, page, &pagepool);
1565 if (err) {
1566 struct erofs_vnode *vi = EROFS_V(inode);
1567
1568 errln("%s, readahead error at page %lu of nid %llu",
1569 __func__, page->index, vi->nid);
1570 }
1571
1572 put_page(page);
1573 }
1574
1575 (void)z_erofs_vle_work_iter_end(&f.builder);
1576
1577 z_erofs_submit_and_unzip(&f, &pagepool, sync);
1578
1579 if (f.map.mpage)
1580 put_page(f.map.mpage);
1581
1582
1583 put_pages_list(&pagepool);
1584 return 0;
1585}
1586
1587const struct address_space_operations z_erofs_vle_normalaccess_aops = {
1588 .readpage = z_erofs_vle_normalaccess_readpage,
1589 .readpages = z_erofs_vle_normalaccess_readpages,
1590};
1591
1592