1
2
3
4
5
6
7
8#include <linux/fs.h>
9#include <linux/f2fs_fs.h>
10#include <linux/bio.h>
11#include <linux/blkdev.h>
12#include <linux/prefetch.h>
13#include <linux/kthread.h>
14#include <linux/swap.h>
15#include <linux/timer.h>
16#include <linux/freezer.h>
17#include <linux/sched/signal.h>
18
19#include "f2fs.h"
20#include "segment.h"
21#include "node.h"
22#include "gc.h"
23#include "trace.h"
24#include <trace/events/f2fs.h>
25
26#define __reverse_ffz(x) __reverse_ffs(~(x))
27
28static struct kmem_cache *discard_entry_slab;
29static struct kmem_cache *discard_cmd_slab;
30static struct kmem_cache *sit_entry_set_slab;
31static struct kmem_cache *inmem_entry_slab;
32
33static unsigned long __reverse_ulong(unsigned char *str)
34{
35 unsigned long tmp = 0;
36 int shift = 24, idx = 0;
37
38#if BITS_PER_LONG == 64
39 shift = 56;
40#endif
41 while (shift >= 0) {
42 tmp |= (unsigned long)str[idx++] << shift;
43 shift -= BITS_PER_BYTE;
44 }
45 return tmp;
46}
47
48
49
50
51
52static inline unsigned long __reverse_ffs(unsigned long word)
53{
54 int num = 0;
55
56#if BITS_PER_LONG == 64
57 if ((word & 0xffffffff00000000UL) == 0)
58 num += 32;
59 else
60 word >>= 32;
61#endif
62 if ((word & 0xffff0000) == 0)
63 num += 16;
64 else
65 word >>= 16;
66
67 if ((word & 0xff00) == 0)
68 num += 8;
69 else
70 word >>= 8;
71
72 if ((word & 0xf0) == 0)
73 num += 4;
74 else
75 word >>= 4;
76
77 if ((word & 0xc) == 0)
78 num += 2;
79 else
80 word >>= 2;
81
82 if ((word & 0x2) == 0)
83 num += 1;
84 return num;
85}
86
87
88
89
90
91
92
93
94
95
96static unsigned long __find_rev_next_bit(const unsigned long *addr,
97 unsigned long size, unsigned long offset)
98{
99 const unsigned long *p = addr + BIT_WORD(offset);
100 unsigned long result = size;
101 unsigned long tmp;
102
103 if (offset >= size)
104 return size;
105
106 size -= (offset & ~(BITS_PER_LONG - 1));
107 offset %= BITS_PER_LONG;
108
109 while (1) {
110 if (*p == 0)
111 goto pass;
112
113 tmp = __reverse_ulong((unsigned char *)p);
114
115 tmp &= ~0UL >> offset;
116 if (size < BITS_PER_LONG)
117 tmp &= (~0UL << (BITS_PER_LONG - size));
118 if (tmp)
119 goto found;
120pass:
121 if (size <= BITS_PER_LONG)
122 break;
123 size -= BITS_PER_LONG;
124 offset = 0;
125 p++;
126 }
127 return result;
128found:
129 return result - size + __reverse_ffs(tmp);
130}
131
132static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
133 unsigned long size, unsigned long offset)
134{
135 const unsigned long *p = addr + BIT_WORD(offset);
136 unsigned long result = size;
137 unsigned long tmp;
138
139 if (offset >= size)
140 return size;
141
142 size -= (offset & ~(BITS_PER_LONG - 1));
143 offset %= BITS_PER_LONG;
144
145 while (1) {
146 if (*p == ~0UL)
147 goto pass;
148
149 tmp = __reverse_ulong((unsigned char *)p);
150
151 if (offset)
152 tmp |= ~0UL << (BITS_PER_LONG - offset);
153 if (size < BITS_PER_LONG)
154 tmp |= ~0UL >> size;
155 if (tmp != ~0UL)
156 goto found;
157pass:
158 if (size <= BITS_PER_LONG)
159 break;
160 size -= BITS_PER_LONG;
161 offset = 0;
162 p++;
163 }
164 return result;
165found:
166 return result - size + __reverse_ffz(tmp);
167}
168
169bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
170{
171 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
172 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
173 int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
174
175 if (test_opt(sbi, LFS))
176 return false;
177 if (sbi->gc_mode == GC_URGENT)
178 return true;
179 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
180 return true;
181
182 return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
183 SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
184}
185
186void f2fs_register_inmem_page(struct inode *inode, struct page *page)
187{
188 struct inmem_pages *new;
189
190 f2fs_trace_pid(page);
191
192 f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
193
194 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
195
196
197 new->page = page;
198 INIT_LIST_HEAD(&new->list);
199
200
201 get_page(page);
202 mutex_lock(&F2FS_I(inode)->inmem_lock);
203 list_add_tail(&new->list, &F2FS_I(inode)->inmem_pages);
204 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
205 mutex_unlock(&F2FS_I(inode)->inmem_lock);
206
207 trace_f2fs_register_inmem_page(page, INMEM);
208}
209
210static int __revoke_inmem_pages(struct inode *inode,
211 struct list_head *head, bool drop, bool recover,
212 bool trylock)
213{
214 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
215 struct inmem_pages *cur, *tmp;
216 int err = 0;
217
218 list_for_each_entry_safe(cur, tmp, head, list) {
219 struct page *page = cur->page;
220
221 if (drop)
222 trace_f2fs_commit_inmem_page(page, INMEM_DROP);
223
224 if (trylock) {
225
226
227
228
229 if (!trylock_page(page))
230 continue;
231 } else {
232 lock_page(page);
233 }
234
235 f2fs_wait_on_page_writeback(page, DATA, true, true);
236
237 if (recover) {
238 struct dnode_of_data dn;
239 struct node_info ni;
240
241 trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
242retry:
243 set_new_dnode(&dn, inode, NULL, NULL, 0);
244 err = f2fs_get_dnode_of_data(&dn, page->index,
245 LOOKUP_NODE);
246 if (err) {
247 if (err == -ENOMEM) {
248 congestion_wait(BLK_RW_ASYNC, HZ/50);
249 cond_resched();
250 goto retry;
251 }
252 err = -EAGAIN;
253 goto next;
254 }
255
256 err = f2fs_get_node_info(sbi, dn.nid, &ni);
257 if (err) {
258 f2fs_put_dnode(&dn);
259 return err;
260 }
261
262 if (cur->old_addr == NEW_ADDR) {
263 f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
264 f2fs_update_data_blkaddr(&dn, NEW_ADDR);
265 } else
266 f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
267 cur->old_addr, ni.version, true, true);
268 f2fs_put_dnode(&dn);
269 }
270next:
271
272 if (drop || recover) {
273 ClearPageUptodate(page);
274 clear_cold_data(page);
275 }
276 f2fs_clear_page_private(page);
277 f2fs_put_page(page, 1);
278
279 list_del(&cur->list);
280 kmem_cache_free(inmem_entry_slab, cur);
281 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
282 }
283 return err;
284}
285
286void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
287{
288 struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
289 struct inode *inode;
290 struct f2fs_inode_info *fi;
291next:
292 spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
293 if (list_empty(head)) {
294 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
295 return;
296 }
297 fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
298 inode = igrab(&fi->vfs_inode);
299 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
300
301 if (inode) {
302 if (gc_failure) {
303 if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
304 goto drop;
305 goto skip;
306 }
307drop:
308 set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
309 f2fs_drop_inmem_pages(inode);
310 iput(inode);
311 }
312skip:
313 congestion_wait(BLK_RW_ASYNC, HZ/50);
314 cond_resched();
315 goto next;
316}
317
318void f2fs_drop_inmem_pages(struct inode *inode)
319{
320 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
321 struct f2fs_inode_info *fi = F2FS_I(inode);
322
323 while (!list_empty(&fi->inmem_pages)) {
324 mutex_lock(&fi->inmem_lock);
325 __revoke_inmem_pages(inode, &fi->inmem_pages,
326 true, false, true);
327 mutex_unlock(&fi->inmem_lock);
328 }
329
330 clear_inode_flag(inode, FI_ATOMIC_FILE);
331 fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
332 stat_dec_atomic_write(inode);
333
334 spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
335 if (!list_empty(&fi->inmem_ilist))
336 list_del_init(&fi->inmem_ilist);
337 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
338}
339
340void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
341{
342 struct f2fs_inode_info *fi = F2FS_I(inode);
343 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
344 struct list_head *head = &fi->inmem_pages;
345 struct inmem_pages *cur = NULL;
346
347 f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page));
348
349 mutex_lock(&fi->inmem_lock);
350 list_for_each_entry(cur, head, list) {
351 if (cur->page == page)
352 break;
353 }
354
355 f2fs_bug_on(sbi, list_empty(head) || cur->page != page);
356 list_del(&cur->list);
357 mutex_unlock(&fi->inmem_lock);
358
359 dec_page_count(sbi, F2FS_INMEM_PAGES);
360 kmem_cache_free(inmem_entry_slab, cur);
361
362 ClearPageUptodate(page);
363 f2fs_clear_page_private(page);
364 f2fs_put_page(page, 0);
365
366 trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
367}
368
369static int __f2fs_commit_inmem_pages(struct inode *inode)
370{
371 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
372 struct f2fs_inode_info *fi = F2FS_I(inode);
373 struct inmem_pages *cur, *tmp;
374 struct f2fs_io_info fio = {
375 .sbi = sbi,
376 .ino = inode->i_ino,
377 .type = DATA,
378 .op = REQ_OP_WRITE,
379 .op_flags = REQ_SYNC | REQ_PRIO,
380 .io_type = FS_DATA_IO,
381 };
382 struct list_head revoke_list;
383 bool submit_bio = false;
384 int err = 0;
385
386 INIT_LIST_HEAD(&revoke_list);
387
388 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
389 struct page *page = cur->page;
390
391 lock_page(page);
392 if (page->mapping == inode->i_mapping) {
393 trace_f2fs_commit_inmem_page(page, INMEM);
394
395 f2fs_wait_on_page_writeback(page, DATA, true, true);
396
397 set_page_dirty(page);
398 if (clear_page_dirty_for_io(page)) {
399 inode_dec_dirty_pages(inode);
400 f2fs_remove_dirty_inode(inode);
401 }
402retry:
403 fio.page = page;
404 fio.old_blkaddr = NULL_ADDR;
405 fio.encrypted_page = NULL;
406 fio.need_lock = LOCK_DONE;
407 err = f2fs_do_write_data_page(&fio);
408 if (err) {
409 if (err == -ENOMEM) {
410 congestion_wait(BLK_RW_ASYNC, HZ/50);
411 cond_resched();
412 goto retry;
413 }
414 unlock_page(page);
415 break;
416 }
417
418 cur->old_addr = fio.old_blkaddr;
419 submit_bio = true;
420 }
421 unlock_page(page);
422 list_move_tail(&cur->list, &revoke_list);
423 }
424
425 if (submit_bio)
426 f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
427
428 if (err) {
429
430
431
432
433
434
435
436
437 err = __revoke_inmem_pages(inode, &revoke_list,
438 false, true, false);
439
440
441 __revoke_inmem_pages(inode, &fi->inmem_pages,
442 true, false, false);
443 } else {
444 __revoke_inmem_pages(inode, &revoke_list,
445 false, false, false);
446 }
447
448 return err;
449}
450
451int f2fs_commit_inmem_pages(struct inode *inode)
452{
453 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
454 struct f2fs_inode_info *fi = F2FS_I(inode);
455 int err;
456
457 f2fs_balance_fs(sbi, true);
458
459 down_write(&fi->i_gc_rwsem[WRITE]);
460
461 f2fs_lock_op(sbi);
462 set_inode_flag(inode, FI_ATOMIC_COMMIT);
463
464 mutex_lock(&fi->inmem_lock);
465 err = __f2fs_commit_inmem_pages(inode);
466 mutex_unlock(&fi->inmem_lock);
467
468 clear_inode_flag(inode, FI_ATOMIC_COMMIT);
469
470 f2fs_unlock_op(sbi);
471 up_write(&fi->i_gc_rwsem[WRITE]);
472
473 return err;
474}
475
476
477
478
479
480void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
481{
482 if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
483 f2fs_show_injection_info(FAULT_CHECKPOINT);
484 f2fs_stop_checkpoint(sbi, false);
485 }
486
487
488 if (need && excess_cached_nats(sbi))
489 f2fs_balance_fs_bg(sbi);
490
491 if (!f2fs_is_checkpoint_ready(sbi))
492 return;
493
494
495
496
497
498 if (has_not_enough_free_secs(sbi, 0, 0)) {
499 mutex_lock(&sbi->gc_mutex);
500 f2fs_gc(sbi, false, false, NULL_SEGNO);
501 }
502}
503
504void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
505{
506 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
507 return;
508
509
510 if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
511 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
512
513
514 if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
515 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
516
517 if (!f2fs_available_free_memory(sbi, FREE_NIDS))
518 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
519 else
520 f2fs_build_free_nids(sbi, false, false);
521
522 if (!is_idle(sbi, REQ_TIME) &&
523 (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
524 return;
525
526
527 if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
528 !f2fs_available_free_memory(sbi, INO_ENTRIES) ||
529 excess_prefree_segs(sbi) ||
530 excess_dirty_nats(sbi) ||
531 excess_dirty_nodes(sbi) ||
532 f2fs_time_over(sbi, CP_TIME)) {
533 if (test_opt(sbi, DATA_FLUSH)) {
534 struct blk_plug plug;
535
536 mutex_lock(&sbi->flush_lock);
537
538 blk_start_plug(&plug);
539 f2fs_sync_dirty_inodes(sbi, FILE_INODE);
540 blk_finish_plug(&plug);
541
542 mutex_unlock(&sbi->flush_lock);
543 }
544 f2fs_sync_fs(sbi->sb, true);
545 stat_inc_bg_cp_count(sbi->stat_info);
546 }
547}
548
549static int __submit_flush_wait(struct f2fs_sb_info *sbi,
550 struct block_device *bdev)
551{
552 struct bio *bio;
553 int ret;
554
555 bio = f2fs_bio_alloc(sbi, 0, false);
556 if (!bio)
557 return -ENOMEM;
558
559 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
560 bio_set_dev(bio, bdev);
561 ret = submit_bio_wait(bio);
562 bio_put(bio);
563
564 trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
565 test_opt(sbi, FLUSH_MERGE), ret);
566 return ret;
567}
568
569static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
570{
571 int ret = 0;
572 int i;
573
574 if (!f2fs_is_multi_device(sbi))
575 return __submit_flush_wait(sbi, sbi->sb->s_bdev);
576
577 for (i = 0; i < sbi->s_ndevs; i++) {
578 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
579 continue;
580 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
581 if (ret)
582 break;
583 }
584 return ret;
585}
586
587static int issue_flush_thread(void *data)
588{
589 struct f2fs_sb_info *sbi = data;
590 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
591 wait_queue_head_t *q = &fcc->flush_wait_queue;
592repeat:
593 if (kthread_should_stop())
594 return 0;
595
596 sb_start_intwrite(sbi->sb);
597
598 if (!llist_empty(&fcc->issue_list)) {
599 struct flush_cmd *cmd, *next;
600 int ret;
601
602 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
603 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
604
605 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
606
607 ret = submit_flush_wait(sbi, cmd->ino);
608 atomic_inc(&fcc->issued_flush);
609
610 llist_for_each_entry_safe(cmd, next,
611 fcc->dispatch_list, llnode) {
612 cmd->ret = ret;
613 complete(&cmd->wait);
614 }
615 fcc->dispatch_list = NULL;
616 }
617
618 sb_end_intwrite(sbi->sb);
619
620 wait_event_interruptible(*q,
621 kthread_should_stop() || !llist_empty(&fcc->issue_list));
622 goto repeat;
623}
624
625int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
626{
627 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
628 struct flush_cmd cmd;
629 int ret;
630
631 if (test_opt(sbi, NOBARRIER))
632 return 0;
633
634 if (!test_opt(sbi, FLUSH_MERGE)) {
635 atomic_inc(&fcc->queued_flush);
636 ret = submit_flush_wait(sbi, ino);
637 atomic_dec(&fcc->queued_flush);
638 atomic_inc(&fcc->issued_flush);
639 return ret;
640 }
641
642 if (atomic_inc_return(&fcc->queued_flush) == 1 ||
643 f2fs_is_multi_device(sbi)) {
644 ret = submit_flush_wait(sbi, ino);
645 atomic_dec(&fcc->queued_flush);
646
647 atomic_inc(&fcc->issued_flush);
648 return ret;
649 }
650
651 cmd.ino = ino;
652 init_completion(&cmd.wait);
653
654 llist_add(&cmd.llnode, &fcc->issue_list);
655
656
657 smp_mb();
658
659 if (waitqueue_active(&fcc->flush_wait_queue))
660 wake_up(&fcc->flush_wait_queue);
661
662 if (fcc->f2fs_issue_flush) {
663 wait_for_completion(&cmd.wait);
664 atomic_dec(&fcc->queued_flush);
665 } else {
666 struct llist_node *list;
667
668 list = llist_del_all(&fcc->issue_list);
669 if (!list) {
670 wait_for_completion(&cmd.wait);
671 atomic_dec(&fcc->queued_flush);
672 } else {
673 struct flush_cmd *tmp, *next;
674
675 ret = submit_flush_wait(sbi, ino);
676
677 llist_for_each_entry_safe(tmp, next, list, llnode) {
678 if (tmp == &cmd) {
679 cmd.ret = ret;
680 atomic_dec(&fcc->queued_flush);
681 continue;
682 }
683 tmp->ret = ret;
684 complete(&tmp->wait);
685 }
686 }
687 }
688
689 return cmd.ret;
690}
691
692int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
693{
694 dev_t dev = sbi->sb->s_bdev->bd_dev;
695 struct flush_cmd_control *fcc;
696 int err = 0;
697
698 if (SM_I(sbi)->fcc_info) {
699 fcc = SM_I(sbi)->fcc_info;
700 if (fcc->f2fs_issue_flush)
701 return err;
702 goto init_thread;
703 }
704
705 fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
706 if (!fcc)
707 return -ENOMEM;
708 atomic_set(&fcc->issued_flush, 0);
709 atomic_set(&fcc->queued_flush, 0);
710 init_waitqueue_head(&fcc->flush_wait_queue);
711 init_llist_head(&fcc->issue_list);
712 SM_I(sbi)->fcc_info = fcc;
713 if (!test_opt(sbi, FLUSH_MERGE))
714 return err;
715
716init_thread:
717 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
718 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
719 if (IS_ERR(fcc->f2fs_issue_flush)) {
720 err = PTR_ERR(fcc->f2fs_issue_flush);
721 kvfree(fcc);
722 SM_I(sbi)->fcc_info = NULL;
723 return err;
724 }
725
726 return err;
727}
728
729void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
730{
731 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
732
733 if (fcc && fcc->f2fs_issue_flush) {
734 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
735
736 fcc->f2fs_issue_flush = NULL;
737 kthread_stop(flush_thread);
738 }
739 if (free) {
740 kvfree(fcc);
741 SM_I(sbi)->fcc_info = NULL;
742 }
743}
744
745int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
746{
747 int ret = 0, i;
748
749 if (!f2fs_is_multi_device(sbi))
750 return 0;
751
752 for (i = 1; i < sbi->s_ndevs; i++) {
753 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
754 continue;
755 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
756 if (ret)
757 break;
758
759 spin_lock(&sbi->dev_lock);
760 f2fs_clear_bit(i, (char *)&sbi->dirty_device);
761 spin_unlock(&sbi->dev_lock);
762 }
763
764 return ret;
765}
766
767static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
768 enum dirty_type dirty_type)
769{
770 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
771
772
773 if (IS_CURSEG(sbi, segno))
774 return;
775
776 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
777 dirty_i->nr_dirty[dirty_type]++;
778
779 if (dirty_type == DIRTY) {
780 struct seg_entry *sentry = get_seg_entry(sbi, segno);
781 enum dirty_type t = sentry->type;
782
783 if (unlikely(t >= DIRTY)) {
784 f2fs_bug_on(sbi, 1);
785 return;
786 }
787 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
788 dirty_i->nr_dirty[t]++;
789 }
790}
791
792static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
793 enum dirty_type dirty_type)
794{
795 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
796
797 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
798 dirty_i->nr_dirty[dirty_type]--;
799
800 if (dirty_type == DIRTY) {
801 struct seg_entry *sentry = get_seg_entry(sbi, segno);
802 enum dirty_type t = sentry->type;
803
804 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
805 dirty_i->nr_dirty[t]--;
806
807 if (get_valid_blocks(sbi, segno, true) == 0) {
808 clear_bit(GET_SEC_FROM_SEG(sbi, segno),
809 dirty_i->victim_secmap);
810#ifdef CONFIG_F2FS_CHECK_FS
811 clear_bit(segno, SIT_I(sbi)->invalid_segmap);
812#endif
813 }
814 }
815}
816
817
818
819
820
821
822static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
823{
824 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
825 unsigned short valid_blocks, ckpt_valid_blocks;
826
827 if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
828 return;
829
830 mutex_lock(&dirty_i->seglist_lock);
831
832 valid_blocks = get_valid_blocks(sbi, segno, false);
833 ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
834
835 if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
836 ckpt_valid_blocks == sbi->blocks_per_seg)) {
837 __locate_dirty_segment(sbi, segno, PRE);
838 __remove_dirty_segment(sbi, segno, DIRTY);
839 } else if (valid_blocks < sbi->blocks_per_seg) {
840 __locate_dirty_segment(sbi, segno, DIRTY);
841 } else {
842
843 __remove_dirty_segment(sbi, segno, DIRTY);
844 }
845
846 mutex_unlock(&dirty_i->seglist_lock);
847}
848
849
850void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
851{
852 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
853 unsigned int segno;
854
855 mutex_lock(&dirty_i->seglist_lock);
856 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
857 if (get_valid_blocks(sbi, segno, false))
858 continue;
859 if (IS_CURSEG(sbi, segno))
860 continue;
861 __locate_dirty_segment(sbi, segno, PRE);
862 __remove_dirty_segment(sbi, segno, DIRTY);
863 }
864 mutex_unlock(&dirty_i->seglist_lock);
865}
866
867block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
868{
869 int ovp_hole_segs =
870 (overprovision_segments(sbi) - reserved_segments(sbi));
871 block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
872 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
873 block_t holes[2] = {0, 0};
874 block_t unusable;
875 struct seg_entry *se;
876 unsigned int segno;
877
878 mutex_lock(&dirty_i->seglist_lock);
879 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
880 se = get_seg_entry(sbi, segno);
881 if (IS_NODESEG(se->type))
882 holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
883 else
884 holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
885 }
886 mutex_unlock(&dirty_i->seglist_lock);
887
888 unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE];
889 if (unusable > ovp_holes)
890 return unusable - ovp_holes;
891 return 0;
892}
893
894int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
895{
896 int ovp_hole_segs =
897 (overprovision_segments(sbi) - reserved_segments(sbi));
898 if (unusable > F2FS_OPTION(sbi).unusable_cap)
899 return -EAGAIN;
900 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
901 dirty_segments(sbi) > ovp_hole_segs)
902 return -EAGAIN;
903 return 0;
904}
905
906
907static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
908{
909 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
910 unsigned int segno = 0;
911
912 mutex_lock(&dirty_i->seglist_lock);
913 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
914 if (get_valid_blocks(sbi, segno, false))
915 continue;
916 if (get_ckpt_valid_blocks(sbi, segno))
917 continue;
918 mutex_unlock(&dirty_i->seglist_lock);
919 return segno;
920 }
921 mutex_unlock(&dirty_i->seglist_lock);
922 return NULL_SEGNO;
923}
924
925static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
926 struct block_device *bdev, block_t lstart,
927 block_t start, block_t len)
928{
929 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
930 struct list_head *pend_list;
931 struct discard_cmd *dc;
932
933 f2fs_bug_on(sbi, !len);
934
935 pend_list = &dcc->pend_list[plist_idx(len)];
936
937 dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
938 INIT_LIST_HEAD(&dc->list);
939 dc->bdev = bdev;
940 dc->lstart = lstart;
941 dc->start = start;
942 dc->len = len;
943 dc->ref = 0;
944 dc->state = D_PREP;
945 dc->queued = 0;
946 dc->error = 0;
947 init_completion(&dc->wait);
948 list_add_tail(&dc->list, pend_list);
949 spin_lock_init(&dc->lock);
950 dc->bio_ref = 0;
951 atomic_inc(&dcc->discard_cmd_cnt);
952 dcc->undiscard_blks += len;
953
954 return dc;
955}
956
957static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
958 struct block_device *bdev, block_t lstart,
959 block_t start, block_t len,
960 struct rb_node *parent, struct rb_node **p,
961 bool leftmost)
962{
963 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
964 struct discard_cmd *dc;
965
966 dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
967
968 rb_link_node(&dc->rb_node, parent, p);
969 rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
970
971 return dc;
972}
973
974static void __detach_discard_cmd(struct discard_cmd_control *dcc,
975 struct discard_cmd *dc)
976{
977 if (dc->state == D_DONE)
978 atomic_sub(dc->queued, &dcc->queued_discard);
979
980 list_del(&dc->list);
981 rb_erase_cached(&dc->rb_node, &dcc->root);
982 dcc->undiscard_blks -= dc->len;
983
984 kmem_cache_free(discard_cmd_slab, dc);
985
986 atomic_dec(&dcc->discard_cmd_cnt);
987}
988
989static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
990 struct discard_cmd *dc)
991{
992 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
993 unsigned long flags;
994
995 trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
996
997 spin_lock_irqsave(&dc->lock, flags);
998 if (dc->bio_ref) {
999 spin_unlock_irqrestore(&dc->lock, flags);
1000 return;
1001 }
1002 spin_unlock_irqrestore(&dc->lock, flags);
1003
1004 f2fs_bug_on(sbi, dc->ref);
1005
1006 if (dc->error == -EOPNOTSUPP)
1007 dc->error = 0;
1008
1009 if (dc->error)
1010 printk_ratelimited(
1011 "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d",
1012 KERN_INFO, dc->lstart, dc->start, dc->len, dc->error);
1013 __detach_discard_cmd(dcc, dc);
1014}
1015
1016static void f2fs_submit_discard_endio(struct bio *bio)
1017{
1018 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
1019 unsigned long flags;
1020
1021 dc->error = blk_status_to_errno(bio->bi_status);
1022
1023 spin_lock_irqsave(&dc->lock, flags);
1024 dc->bio_ref--;
1025 if (!dc->bio_ref && dc->state == D_SUBMIT) {
1026 dc->state = D_DONE;
1027 complete_all(&dc->wait);
1028 }
1029 spin_unlock_irqrestore(&dc->lock, flags);
1030 bio_put(bio);
1031}
1032
1033static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
1034 block_t start, block_t end)
1035{
1036#ifdef CONFIG_F2FS_CHECK_FS
1037 struct seg_entry *sentry;
1038 unsigned int segno;
1039 block_t blk = start;
1040 unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
1041 unsigned long *map;
1042
1043 while (blk < end) {
1044 segno = GET_SEGNO(sbi, blk);
1045 sentry = get_seg_entry(sbi, segno);
1046 offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
1047
1048 if (end < START_BLOCK(sbi, segno + 1))
1049 size = GET_BLKOFF_FROM_SEG0(sbi, end);
1050 else
1051 size = max_blocks;
1052 map = (unsigned long *)(sentry->cur_valid_map);
1053 offset = __find_rev_next_bit(map, size, offset);
1054 f2fs_bug_on(sbi, offset != size);
1055 blk = START_BLOCK(sbi, segno + 1);
1056 }
1057#endif
1058}
1059
1060static void __init_discard_policy(struct f2fs_sb_info *sbi,
1061 struct discard_policy *dpolicy,
1062 int discard_type, unsigned int granularity)
1063{
1064
1065 dpolicy->type = discard_type;
1066 dpolicy->sync = true;
1067 dpolicy->ordered = false;
1068 dpolicy->granularity = granularity;
1069
1070 dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
1071 dpolicy->io_aware_gran = MAX_PLIST_NUM;
1072 dpolicy->timeout = 0;
1073
1074 if (discard_type == DPOLICY_BG) {
1075 dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1076 dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1077 dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1078 dpolicy->io_aware = true;
1079 dpolicy->sync = false;
1080 dpolicy->ordered = true;
1081 if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
1082 dpolicy->granularity = 1;
1083 dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1084 }
1085 } else if (discard_type == DPOLICY_FORCE) {
1086 dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1087 dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1088 dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1089 dpolicy->io_aware = false;
1090 } else if (discard_type == DPOLICY_FSTRIM) {
1091 dpolicy->io_aware = false;
1092 } else if (discard_type == DPOLICY_UMOUNT) {
1093 dpolicy->max_requests = UINT_MAX;
1094 dpolicy->io_aware = false;
1095
1096 dpolicy->granularity = 1;
1097 }
1098}
1099
1100static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1101 struct block_device *bdev, block_t lstart,
1102 block_t start, block_t len);
1103
1104static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1105 struct discard_policy *dpolicy,
1106 struct discard_cmd *dc,
1107 unsigned int *issued)
1108{
1109 struct block_device *bdev = dc->bdev;
1110 struct request_queue *q = bdev_get_queue(bdev);
1111 unsigned int max_discard_blocks =
1112 SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1113 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1114 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1115 &(dcc->fstrim_list) : &(dcc->wait_list);
1116 int flag = dpolicy->sync ? REQ_SYNC : 0;
1117 block_t lstart, start, len, total_len;
1118 int err = 0;
1119
1120 if (dc->state != D_PREP)
1121 return 0;
1122
1123 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1124 return 0;
1125
1126 trace_f2fs_issue_discard(bdev, dc->start, dc->len);
1127
1128 lstart = dc->lstart;
1129 start = dc->start;
1130 len = dc->len;
1131 total_len = len;
1132
1133 dc->len = 0;
1134
1135 while (total_len && *issued < dpolicy->max_requests && !err) {
1136 struct bio *bio = NULL;
1137 unsigned long flags;
1138 bool last = true;
1139
1140 if (len > max_discard_blocks) {
1141 len = max_discard_blocks;
1142 last = false;
1143 }
1144
1145 (*issued)++;
1146 if (*issued == dpolicy->max_requests)
1147 last = true;
1148
1149 dc->len += len;
1150
1151 if (time_to_inject(sbi, FAULT_DISCARD)) {
1152 f2fs_show_injection_info(FAULT_DISCARD);
1153 err = -EIO;
1154 goto submit;
1155 }
1156 err = __blkdev_issue_discard(bdev,
1157 SECTOR_FROM_BLOCK(start),
1158 SECTOR_FROM_BLOCK(len),
1159 GFP_NOFS, 0, &bio);
1160submit:
1161 if (err) {
1162 spin_lock_irqsave(&dc->lock, flags);
1163 if (dc->state == D_PARTIAL)
1164 dc->state = D_SUBMIT;
1165 spin_unlock_irqrestore(&dc->lock, flags);
1166
1167 break;
1168 }
1169
1170 f2fs_bug_on(sbi, !bio);
1171
1172
1173
1174
1175
1176 spin_lock_irqsave(&dc->lock, flags);
1177 if (last)
1178 dc->state = D_SUBMIT;
1179 else
1180 dc->state = D_PARTIAL;
1181 dc->bio_ref++;
1182 spin_unlock_irqrestore(&dc->lock, flags);
1183
1184 atomic_inc(&dcc->queued_discard);
1185 dc->queued++;
1186 list_move_tail(&dc->list, wait_list);
1187
1188
1189 __check_sit_bitmap(sbi, lstart, lstart + len);
1190
1191 bio->bi_private = dc;
1192 bio->bi_end_io = f2fs_submit_discard_endio;
1193 bio->bi_opf |= flag;
1194 submit_bio(bio);
1195
1196 atomic_inc(&dcc->issued_discard);
1197
1198 f2fs_update_iostat(sbi, FS_DISCARD, 1);
1199
1200 lstart += len;
1201 start += len;
1202 total_len -= len;
1203 len = total_len;
1204 }
1205
1206 if (!err && len)
1207 __update_discard_tree_range(sbi, bdev, lstart, start, len);
1208 return err;
1209}
1210
1211static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
1212 struct block_device *bdev, block_t lstart,
1213 block_t start, block_t len,
1214 struct rb_node **insert_p,
1215 struct rb_node *insert_parent)
1216{
1217 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1218 struct rb_node **p;
1219 struct rb_node *parent = NULL;
1220 struct discard_cmd *dc = NULL;
1221 bool leftmost = true;
1222
1223 if (insert_p && insert_parent) {
1224 parent = insert_parent;
1225 p = insert_p;
1226 goto do_insert;
1227 }
1228
1229 p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
1230 lstart, &leftmost);
1231do_insert:
1232 dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
1233 p, leftmost);
1234 if (!dc)
1235 return NULL;
1236
1237 return dc;
1238}
1239
1240static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1241 struct discard_cmd *dc)
1242{
1243 list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
1244}
1245
1246static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1247 struct discard_cmd *dc, block_t blkaddr)
1248{
1249 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1250 struct discard_info di = dc->di;
1251 bool modified = false;
1252
1253 if (dc->state == D_DONE || dc->len == 1) {
1254 __remove_discard_cmd(sbi, dc);
1255 return;
1256 }
1257
1258 dcc->undiscard_blks -= di.len;
1259
1260 if (blkaddr > di.lstart) {
1261 dc->len = blkaddr - dc->lstart;
1262 dcc->undiscard_blks += dc->len;
1263 __relocate_discard_cmd(dcc, dc);
1264 modified = true;
1265 }
1266
1267 if (blkaddr < di.lstart + di.len - 1) {
1268 if (modified) {
1269 __insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
1270 di.start + blkaddr + 1 - di.lstart,
1271 di.lstart + di.len - 1 - blkaddr,
1272 NULL, NULL);
1273 } else {
1274 dc->lstart++;
1275 dc->len--;
1276 dc->start++;
1277 dcc->undiscard_blks += dc->len;
1278 __relocate_discard_cmd(dcc, dc);
1279 }
1280 }
1281}
1282
1283static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1284 struct block_device *bdev, block_t lstart,
1285 block_t start, block_t len)
1286{
1287 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1288 struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1289 struct discard_cmd *dc;
1290 struct discard_info di = {0};
1291 struct rb_node **insert_p = NULL, *insert_parent = NULL;
1292 struct request_queue *q = bdev_get_queue(bdev);
1293 unsigned int max_discard_blocks =
1294 SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1295 block_t end = lstart + len;
1296
1297 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1298 NULL, lstart,
1299 (struct rb_entry **)&prev_dc,
1300 (struct rb_entry **)&next_dc,
1301 &insert_p, &insert_parent, true, NULL);
1302 if (dc)
1303 prev_dc = dc;
1304
1305 if (!prev_dc) {
1306 di.lstart = lstart;
1307 di.len = next_dc ? next_dc->lstart - lstart : len;
1308 di.len = min(di.len, len);
1309 di.start = start;
1310 }
1311
1312 while (1) {
1313 struct rb_node *node;
1314 bool merged = false;
1315 struct discard_cmd *tdc = NULL;
1316
1317 if (prev_dc) {
1318 di.lstart = prev_dc->lstart + prev_dc->len;
1319 if (di.lstart < lstart)
1320 di.lstart = lstart;
1321 if (di.lstart >= end)
1322 break;
1323
1324 if (!next_dc || next_dc->lstart > end)
1325 di.len = end - di.lstart;
1326 else
1327 di.len = next_dc->lstart - di.lstart;
1328 di.start = start + di.lstart - lstart;
1329 }
1330
1331 if (!di.len)
1332 goto next;
1333
1334 if (prev_dc && prev_dc->state == D_PREP &&
1335 prev_dc->bdev == bdev &&
1336 __is_discard_back_mergeable(&di, &prev_dc->di,
1337 max_discard_blocks)) {
1338 prev_dc->di.len += di.len;
1339 dcc->undiscard_blks += di.len;
1340 __relocate_discard_cmd(dcc, prev_dc);
1341 di = prev_dc->di;
1342 tdc = prev_dc;
1343 merged = true;
1344 }
1345
1346 if (next_dc && next_dc->state == D_PREP &&
1347 next_dc->bdev == bdev &&
1348 __is_discard_front_mergeable(&di, &next_dc->di,
1349 max_discard_blocks)) {
1350 next_dc->di.lstart = di.lstart;
1351 next_dc->di.len += di.len;
1352 next_dc->di.start = di.start;
1353 dcc->undiscard_blks += di.len;
1354 __relocate_discard_cmd(dcc, next_dc);
1355 if (tdc)
1356 __remove_discard_cmd(sbi, tdc);
1357 merged = true;
1358 }
1359
1360 if (!merged) {
1361 __insert_discard_tree(sbi, bdev, di.lstart, di.start,
1362 di.len, NULL, NULL);
1363 }
1364 next:
1365 prev_dc = next_dc;
1366 if (!prev_dc)
1367 break;
1368
1369 node = rb_next(&prev_dc->rb_node);
1370 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1371 }
1372}
1373
1374static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
1375 struct block_device *bdev, block_t blkstart, block_t blklen)
1376{
1377 block_t lblkstart = blkstart;
1378
1379 if (!f2fs_bdev_support_discard(bdev))
1380 return 0;
1381
1382 trace_f2fs_queue_discard(bdev, blkstart, blklen);
1383
1384 if (f2fs_is_multi_device(sbi)) {
1385 int devi = f2fs_target_device_index(sbi, blkstart);
1386
1387 blkstart -= FDEV(devi).start_blk;
1388 }
1389 mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1390 __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1391 mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1392 return 0;
1393}
1394
1395static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1396 struct discard_policy *dpolicy)
1397{
1398 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1399 struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1400 struct rb_node **insert_p = NULL, *insert_parent = NULL;
1401 struct discard_cmd *dc;
1402 struct blk_plug plug;
1403 unsigned int pos = dcc->next_pos;
1404 unsigned int issued = 0;
1405 bool io_interrupted = false;
1406
1407 mutex_lock(&dcc->cmd_lock);
1408 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1409 NULL, pos,
1410 (struct rb_entry **)&prev_dc,
1411 (struct rb_entry **)&next_dc,
1412 &insert_p, &insert_parent, true, NULL);
1413 if (!dc)
1414 dc = next_dc;
1415
1416 blk_start_plug(&plug);
1417
1418 while (dc) {
1419 struct rb_node *node;
1420 int err = 0;
1421
1422 if (dc->state != D_PREP)
1423 goto next;
1424
1425 if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
1426 io_interrupted = true;
1427 break;
1428 }
1429
1430 dcc->next_pos = dc->lstart + dc->len;
1431 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1432
1433 if (issued >= dpolicy->max_requests)
1434 break;
1435next:
1436 node = rb_next(&dc->rb_node);
1437 if (err)
1438 __remove_discard_cmd(sbi, dc);
1439 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1440 }
1441
1442 blk_finish_plug(&plug);
1443
1444 if (!dc)
1445 dcc->next_pos = 0;
1446
1447 mutex_unlock(&dcc->cmd_lock);
1448
1449 if (!issued && io_interrupted)
1450 issued = -1;
1451
1452 return issued;
1453}
1454
1455static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1456 struct discard_policy *dpolicy)
1457{
1458 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1459 struct list_head *pend_list;
1460 struct discard_cmd *dc, *tmp;
1461 struct blk_plug plug;
1462 int i, issued = 0;
1463 bool io_interrupted = false;
1464
1465 if (dpolicy->timeout != 0)
1466 f2fs_update_time(sbi, dpolicy->timeout);
1467
1468 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1469 if (dpolicy->timeout != 0 &&
1470 f2fs_time_over(sbi, dpolicy->timeout))
1471 break;
1472
1473 if (i + 1 < dpolicy->granularity)
1474 break;
1475
1476 if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
1477 return __issue_discard_cmd_orderly(sbi, dpolicy);
1478
1479 pend_list = &dcc->pend_list[i];
1480
1481 mutex_lock(&dcc->cmd_lock);
1482 if (list_empty(pend_list))
1483 goto next;
1484 if (unlikely(dcc->rbtree_check))
1485 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
1486 &dcc->root));
1487 blk_start_plug(&plug);
1488 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1489 f2fs_bug_on(sbi, dc->state != D_PREP);
1490
1491 if (dpolicy->timeout != 0 &&
1492 f2fs_time_over(sbi, dpolicy->timeout))
1493 break;
1494
1495 if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1496 !is_idle(sbi, DISCARD_TIME)) {
1497 io_interrupted = true;
1498 break;
1499 }
1500
1501 __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1502
1503 if (issued >= dpolicy->max_requests)
1504 break;
1505 }
1506 blk_finish_plug(&plug);
1507next:
1508 mutex_unlock(&dcc->cmd_lock);
1509
1510 if (issued >= dpolicy->max_requests || io_interrupted)
1511 break;
1512 }
1513
1514 if (!issued && io_interrupted)
1515 issued = -1;
1516
1517 return issued;
1518}
1519
1520static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1521{
1522 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1523 struct list_head *pend_list;
1524 struct discard_cmd *dc, *tmp;
1525 int i;
1526 bool dropped = false;
1527
1528 mutex_lock(&dcc->cmd_lock);
1529 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1530 pend_list = &dcc->pend_list[i];
1531 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1532 f2fs_bug_on(sbi, dc->state != D_PREP);
1533 __remove_discard_cmd(sbi, dc);
1534 dropped = true;
1535 }
1536 }
1537 mutex_unlock(&dcc->cmd_lock);
1538
1539 return dropped;
1540}
1541
1542void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1543{
1544 __drop_discard_cmd(sbi);
1545}
1546
1547static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1548 struct discard_cmd *dc)
1549{
1550 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1551 unsigned int len = 0;
1552
1553 wait_for_completion_io(&dc->wait);
1554 mutex_lock(&dcc->cmd_lock);
1555 f2fs_bug_on(sbi, dc->state != D_DONE);
1556 dc->ref--;
1557 if (!dc->ref) {
1558 if (!dc->error)
1559 len = dc->len;
1560 __remove_discard_cmd(sbi, dc);
1561 }
1562 mutex_unlock(&dcc->cmd_lock);
1563
1564 return len;
1565}
1566
1567static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1568 struct discard_policy *dpolicy,
1569 block_t start, block_t end)
1570{
1571 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1572 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1573 &(dcc->fstrim_list) : &(dcc->wait_list);
1574 struct discard_cmd *dc, *tmp;
1575 bool need_wait;
1576 unsigned int trimmed = 0;
1577
1578next:
1579 need_wait = false;
1580
1581 mutex_lock(&dcc->cmd_lock);
1582 list_for_each_entry_safe(dc, tmp, wait_list, list) {
1583 if (dc->lstart + dc->len <= start || end <= dc->lstart)
1584 continue;
1585 if (dc->len < dpolicy->granularity)
1586 continue;
1587 if (dc->state == D_DONE && !dc->ref) {
1588 wait_for_completion_io(&dc->wait);
1589 if (!dc->error)
1590 trimmed += dc->len;
1591 __remove_discard_cmd(sbi, dc);
1592 } else {
1593 dc->ref++;
1594 need_wait = true;
1595 break;
1596 }
1597 }
1598 mutex_unlock(&dcc->cmd_lock);
1599
1600 if (need_wait) {
1601 trimmed += __wait_one_discard_bio(sbi, dc);
1602 goto next;
1603 }
1604
1605 return trimmed;
1606}
1607
1608static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1609 struct discard_policy *dpolicy)
1610{
1611 struct discard_policy dp;
1612 unsigned int discard_blks;
1613
1614 if (dpolicy)
1615 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1616
1617
1618 __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
1619 discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1620 __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
1621 discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1622
1623 return discard_blks;
1624}
1625
1626
1627static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1628{
1629 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1630 struct discard_cmd *dc;
1631 bool need_wait = false;
1632
1633 mutex_lock(&dcc->cmd_lock);
1634 dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
1635 NULL, blkaddr);
1636 if (dc) {
1637 if (dc->state == D_PREP) {
1638 __punch_discard_cmd(sbi, dc, blkaddr);
1639 } else {
1640 dc->ref++;
1641 need_wait = true;
1642 }
1643 }
1644 mutex_unlock(&dcc->cmd_lock);
1645
1646 if (need_wait)
1647 __wait_one_discard_bio(sbi, dc);
1648}
1649
1650void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1651{
1652 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1653
1654 if (dcc && dcc->f2fs_issue_discard) {
1655 struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1656
1657 dcc->f2fs_issue_discard = NULL;
1658 kthread_stop(discard_thread);
1659 }
1660}
1661
1662
1663bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
1664{
1665 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1666 struct discard_policy dpolicy;
1667 bool dropped;
1668
1669 __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
1670 dcc->discard_granularity);
1671 dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
1672 __issue_discard_cmd(sbi, &dpolicy);
1673 dropped = __drop_discard_cmd(sbi);
1674
1675
1676 __wait_all_discard_cmd(sbi, NULL);
1677
1678 f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1679 return dropped;
1680}
1681
1682static int issue_discard_thread(void *data)
1683{
1684 struct f2fs_sb_info *sbi = data;
1685 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1686 wait_queue_head_t *q = &dcc->discard_wait_queue;
1687 struct discard_policy dpolicy;
1688 unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
1689 int issued;
1690
1691 set_freezable();
1692
1693 do {
1694 __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
1695 dcc->discard_granularity);
1696
1697 wait_event_interruptible_timeout(*q,
1698 kthread_should_stop() || freezing(current) ||
1699 dcc->discard_wake,
1700 msecs_to_jiffies(wait_ms));
1701
1702 if (dcc->discard_wake)
1703 dcc->discard_wake = 0;
1704
1705
1706 if (atomic_read(&dcc->queued_discard))
1707 __wait_all_discard_cmd(sbi, NULL);
1708
1709 if (try_to_freeze())
1710 continue;
1711 if (f2fs_readonly(sbi->sb))
1712 continue;
1713 if (kthread_should_stop())
1714 return 0;
1715 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
1716 wait_ms = dpolicy.max_interval;
1717 continue;
1718 }
1719
1720 if (sbi->gc_mode == GC_URGENT)
1721 __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
1722
1723 sb_start_intwrite(sbi->sb);
1724
1725 issued = __issue_discard_cmd(sbi, &dpolicy);
1726 if (issued > 0) {
1727 __wait_all_discard_cmd(sbi, &dpolicy);
1728 wait_ms = dpolicy.min_interval;
1729 } else if (issued == -1){
1730 wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
1731 if (!wait_ms)
1732 wait_ms = dpolicy.mid_interval;
1733 } else {
1734 wait_ms = dpolicy.max_interval;
1735 }
1736
1737 sb_end_intwrite(sbi->sb);
1738
1739 } while (!kthread_should_stop());
1740 return 0;
1741}
1742
1743#ifdef CONFIG_BLK_DEV_ZONED
1744static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1745 struct block_device *bdev, block_t blkstart, block_t blklen)
1746{
1747 sector_t sector, nr_sects;
1748 block_t lblkstart = blkstart;
1749 int devi = 0;
1750
1751 if (f2fs_is_multi_device(sbi)) {
1752 devi = f2fs_target_device_index(sbi, blkstart);
1753 if (blkstart < FDEV(devi).start_blk ||
1754 blkstart > FDEV(devi).end_blk) {
1755 f2fs_err(sbi, "Invalid block %x", blkstart);
1756 return -EIO;
1757 }
1758 blkstart -= FDEV(devi).start_blk;
1759 }
1760
1761
1762 if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
1763 sector = SECTOR_FROM_BLOCK(blkstart);
1764 nr_sects = SECTOR_FROM_BLOCK(blklen);
1765
1766 if (sector & (bdev_zone_sectors(bdev) - 1) ||
1767 nr_sects != bdev_zone_sectors(bdev)) {
1768 f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
1769 devi, sbi->s_ndevs ? FDEV(devi).path : "",
1770 blkstart, blklen);
1771 return -EIO;
1772 }
1773 trace_f2fs_issue_reset_zone(bdev, blkstart);
1774 return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
1775 }
1776
1777
1778 return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1779}
1780#endif
1781
1782static int __issue_discard_async(struct f2fs_sb_info *sbi,
1783 struct block_device *bdev, block_t blkstart, block_t blklen)
1784{
1785#ifdef CONFIG_BLK_DEV_ZONED
1786 if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
1787 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
1788#endif
1789 return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
1790}
1791
1792static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
1793 block_t blkstart, block_t blklen)
1794{
1795 sector_t start = blkstart, len = 0;
1796 struct block_device *bdev;
1797 struct seg_entry *se;
1798 unsigned int offset;
1799 block_t i;
1800 int err = 0;
1801
1802 bdev = f2fs_target_device(sbi, blkstart, NULL);
1803
1804 for (i = blkstart; i < blkstart + blklen; i++, len++) {
1805 if (i != start) {
1806 struct block_device *bdev2 =
1807 f2fs_target_device(sbi, i, NULL);
1808
1809 if (bdev2 != bdev) {
1810 err = __issue_discard_async(sbi, bdev,
1811 start, len);
1812 if (err)
1813 return err;
1814 bdev = bdev2;
1815 start = i;
1816 len = 0;
1817 }
1818 }
1819
1820 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
1821 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
1822
1823 if (!f2fs_test_and_set_bit(offset, se->discard_map))
1824 sbi->discard_blks--;
1825 }
1826
1827 if (len)
1828 err = __issue_discard_async(sbi, bdev, start, len);
1829 return err;
1830}
1831
1832static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
1833 bool check_only)
1834{
1835 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
1836 int max_blocks = sbi->blocks_per_seg;
1837 struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
1838 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
1839 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1840 unsigned long *discard_map = (unsigned long *)se->discard_map;
1841 unsigned long *dmap = SIT_I(sbi)->tmp_map;
1842 unsigned int start = 0, end = -1;
1843 bool force = (cpc->reason & CP_DISCARD);
1844 struct discard_entry *de = NULL;
1845 struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
1846 int i;
1847
1848 if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
1849 return false;
1850
1851 if (!force) {
1852 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
1853 SM_I(sbi)->dcc_info->nr_discards >=
1854 SM_I(sbi)->dcc_info->max_discards)
1855 return false;
1856 }
1857
1858
1859 for (i = 0; i < entries; i++)
1860 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
1861 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
1862
1863 while (force || SM_I(sbi)->dcc_info->nr_discards <=
1864 SM_I(sbi)->dcc_info->max_discards) {
1865 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
1866 if (start >= max_blocks)
1867 break;
1868
1869 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
1870 if (force && start && end != max_blocks
1871 && (end - start) < cpc->trim_minlen)
1872 continue;
1873
1874 if (check_only)
1875 return true;
1876
1877 if (!de) {
1878 de = f2fs_kmem_cache_alloc(discard_entry_slab,
1879 GFP_F2FS_ZERO);
1880 de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
1881 list_add_tail(&de->list, head);
1882 }
1883
1884 for (i = start; i < end; i++)
1885 __set_bit_le(i, (void *)de->discard_map);
1886
1887 SM_I(sbi)->dcc_info->nr_discards += end - start;
1888 }
1889 return false;
1890}
1891
1892static void release_discard_addr(struct discard_entry *entry)
1893{
1894 list_del(&entry->list);
1895 kmem_cache_free(discard_entry_slab, entry);
1896}
1897
1898void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
1899{
1900 struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
1901 struct discard_entry *entry, *this;
1902
1903
1904 list_for_each_entry_safe(entry, this, head, list)
1905 release_discard_addr(entry);
1906}
1907
1908
1909
1910
1911static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
1912{
1913 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1914 unsigned int segno;
1915
1916 mutex_lock(&dirty_i->seglist_lock);
1917 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
1918 __set_test_and_free(sbi, segno);
1919 mutex_unlock(&dirty_i->seglist_lock);
1920}
1921
1922void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
1923 struct cp_control *cpc)
1924{
1925 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1926 struct list_head *head = &dcc->entry_list;
1927 struct discard_entry *entry, *this;
1928 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1929 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
1930 unsigned int start = 0, end = -1;
1931 unsigned int secno, start_segno;
1932 bool force = (cpc->reason & CP_DISCARD);
1933 bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
1934
1935 mutex_lock(&dirty_i->seglist_lock);
1936
1937 while (1) {
1938 int i;
1939
1940 if (need_align && end != -1)
1941 end--;
1942 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
1943 if (start >= MAIN_SEGS(sbi))
1944 break;
1945 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
1946 start + 1);
1947
1948 if (need_align) {
1949 start = rounddown(start, sbi->segs_per_sec);
1950 end = roundup(end, sbi->segs_per_sec);
1951 }
1952
1953 for (i = start; i < end; i++) {
1954 if (test_and_clear_bit(i, prefree_map))
1955 dirty_i->nr_dirty[PRE]--;
1956 }
1957
1958 if (!f2fs_realtime_discard_enable(sbi))
1959 continue;
1960
1961 if (force && start >= cpc->trim_start &&
1962 (end - 1) <= cpc->trim_end)
1963 continue;
1964
1965 if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) {
1966 f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
1967 (end - start) << sbi->log_blocks_per_seg);
1968 continue;
1969 }
1970next:
1971 secno = GET_SEC_FROM_SEG(sbi, start);
1972 start_segno = GET_SEG_FROM_SEC(sbi, secno);
1973 if (!IS_CURSEC(sbi, secno) &&
1974 !get_valid_blocks(sbi, start, true))
1975 f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
1976 sbi->segs_per_sec << sbi->log_blocks_per_seg);
1977
1978 start = start_segno + sbi->segs_per_sec;
1979 if (start < end)
1980 goto next;
1981 else
1982 end = start - 1;
1983 }
1984 mutex_unlock(&dirty_i->seglist_lock);
1985
1986
1987 list_for_each_entry_safe(entry, this, head, list) {
1988 unsigned int cur_pos = 0, next_pos, len, total_len = 0;
1989 bool is_valid = test_bit_le(0, entry->discard_map);
1990
1991find_next:
1992 if (is_valid) {
1993 next_pos = find_next_zero_bit_le(entry->discard_map,
1994 sbi->blocks_per_seg, cur_pos);
1995 len = next_pos - cur_pos;
1996
1997 if (f2fs_sb_has_blkzoned(sbi) ||
1998 (force && len < cpc->trim_minlen))
1999 goto skip;
2000
2001 f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
2002 len);
2003 total_len += len;
2004 } else {
2005 next_pos = find_next_bit_le(entry->discard_map,
2006 sbi->blocks_per_seg, cur_pos);
2007 }
2008skip:
2009 cur_pos = next_pos;
2010 is_valid = !is_valid;
2011
2012 if (cur_pos < sbi->blocks_per_seg)
2013 goto find_next;
2014
2015 release_discard_addr(entry);
2016 dcc->nr_discards -= total_len;
2017 }
2018
2019 wake_up_discard_thread(sbi, false);
2020}
2021
2022static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
2023{
2024 dev_t dev = sbi->sb->s_bdev->bd_dev;
2025 struct discard_cmd_control *dcc;
2026 int err = 0, i;
2027
2028 if (SM_I(sbi)->dcc_info) {
2029 dcc = SM_I(sbi)->dcc_info;
2030 goto init_thread;
2031 }
2032
2033 dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
2034 if (!dcc)
2035 return -ENOMEM;
2036
2037 dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
2038 INIT_LIST_HEAD(&dcc->entry_list);
2039 for (i = 0; i < MAX_PLIST_NUM; i++)
2040 INIT_LIST_HEAD(&dcc->pend_list[i]);
2041 INIT_LIST_HEAD(&dcc->wait_list);
2042 INIT_LIST_HEAD(&dcc->fstrim_list);
2043 mutex_init(&dcc->cmd_lock);
2044 atomic_set(&dcc->issued_discard, 0);
2045 atomic_set(&dcc->queued_discard, 0);
2046 atomic_set(&dcc->discard_cmd_cnt, 0);
2047 dcc->nr_discards = 0;
2048 dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
2049 dcc->undiscard_blks = 0;
2050 dcc->next_pos = 0;
2051 dcc->root = RB_ROOT_CACHED;
2052 dcc->rbtree_check = false;
2053
2054 init_waitqueue_head(&dcc->discard_wait_queue);
2055 SM_I(sbi)->dcc_info = dcc;
2056init_thread:
2057 dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
2058 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
2059 if (IS_ERR(dcc->f2fs_issue_discard)) {
2060 err = PTR_ERR(dcc->f2fs_issue_discard);
2061 kvfree(dcc);
2062 SM_I(sbi)->dcc_info = NULL;
2063 return err;
2064 }
2065
2066 return err;
2067}
2068
2069static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2070{
2071 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2072
2073 if (!dcc)
2074 return;
2075
2076 f2fs_stop_discard_thread(sbi);
2077
2078
2079
2080
2081
2082 if (unlikely(atomic_read(&dcc->discard_cmd_cnt)))
2083 f2fs_issue_discard_timeout(sbi);
2084
2085 kvfree(dcc);
2086 SM_I(sbi)->dcc_info = NULL;
2087}
2088
2089static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
2090{
2091 struct sit_info *sit_i = SIT_I(sbi);
2092
2093 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
2094 sit_i->dirty_sentries++;
2095 return false;
2096 }
2097
2098 return true;
2099}
2100
2101static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
2102 unsigned int segno, int modified)
2103{
2104 struct seg_entry *se = get_seg_entry(sbi, segno);
2105 se->type = type;
2106 if (modified)
2107 __mark_sit_entry_dirty(sbi, segno);
2108}
2109
2110static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2111{
2112 struct seg_entry *se;
2113 unsigned int segno, offset;
2114 long int new_vblocks;
2115 bool exist;
2116#ifdef CONFIG_F2FS_CHECK_FS
2117 bool mir_exist;
2118#endif
2119
2120 segno = GET_SEGNO(sbi, blkaddr);
2121
2122 se = get_seg_entry(sbi, segno);
2123 new_vblocks = se->valid_blocks + del;
2124 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2125
2126 f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
2127 (new_vblocks > sbi->blocks_per_seg)));
2128
2129 se->valid_blocks = new_vblocks;
2130 se->mtime = get_mtime(sbi, false);
2131 if (se->mtime > SIT_I(sbi)->max_mtime)
2132 SIT_I(sbi)->max_mtime = se->mtime;
2133
2134
2135 if (del > 0) {
2136 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2137#ifdef CONFIG_F2FS_CHECK_FS
2138 mir_exist = f2fs_test_and_set_bit(offset,
2139 se->cur_valid_map_mir);
2140 if (unlikely(exist != mir_exist)) {
2141 f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
2142 blkaddr, exist);
2143 f2fs_bug_on(sbi, 1);
2144 }
2145#endif
2146 if (unlikely(exist)) {
2147 f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
2148 blkaddr);
2149 f2fs_bug_on(sbi, 1);
2150 se->valid_blocks--;
2151 del = 0;
2152 }
2153
2154 if (!f2fs_test_and_set_bit(offset, se->discard_map))
2155 sbi->discard_blks--;
2156
2157
2158
2159
2160
2161 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2162 if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2163 se->ckpt_valid_blocks++;
2164 }
2165 } else {
2166 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2167#ifdef CONFIG_F2FS_CHECK_FS
2168 mir_exist = f2fs_test_and_clear_bit(offset,
2169 se->cur_valid_map_mir);
2170 if (unlikely(exist != mir_exist)) {
2171 f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
2172 blkaddr, exist);
2173 f2fs_bug_on(sbi, 1);
2174 }
2175#endif
2176 if (unlikely(!exist)) {
2177 f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
2178 blkaddr);
2179 f2fs_bug_on(sbi, 1);
2180 se->valid_blocks++;
2181 del = 0;
2182 } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2183
2184
2185
2186
2187
2188
2189 if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
2190 spin_lock(&sbi->stat_lock);
2191 sbi->unusable_block_count++;
2192 spin_unlock(&sbi->stat_lock);
2193 }
2194 }
2195
2196 if (f2fs_test_and_clear_bit(offset, se->discard_map))
2197 sbi->discard_blks++;
2198 }
2199 if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2200 se->ckpt_valid_blocks += del;
2201
2202 __mark_sit_entry_dirty(sbi, segno);
2203
2204
2205 SIT_I(sbi)->written_valid_blocks += del;
2206
2207 if (__is_large_section(sbi))
2208 get_sec_entry(sbi, segno)->valid_blocks += del;
2209}
2210
2211void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2212{
2213 unsigned int segno = GET_SEGNO(sbi, addr);
2214 struct sit_info *sit_i = SIT_I(sbi);
2215
2216 f2fs_bug_on(sbi, addr == NULL_ADDR);
2217 if (addr == NEW_ADDR)
2218 return;
2219
2220 invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
2221
2222
2223 down_write(&sit_i->sentry_lock);
2224
2225 update_sit_entry(sbi, addr, -1);
2226
2227
2228 locate_dirty_segment(sbi, segno);
2229
2230 up_write(&sit_i->sentry_lock);
2231}
2232
2233bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2234{
2235 struct sit_info *sit_i = SIT_I(sbi);
2236 unsigned int segno, offset;
2237 struct seg_entry *se;
2238 bool is_cp = false;
2239
2240 if (!__is_valid_data_blkaddr(blkaddr))
2241 return true;
2242
2243 down_read(&sit_i->sentry_lock);
2244
2245 segno = GET_SEGNO(sbi, blkaddr);
2246 se = get_seg_entry(sbi, segno);
2247 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2248
2249 if (f2fs_test_bit(offset, se->ckpt_valid_map))
2250 is_cp = true;
2251
2252 up_read(&sit_i->sentry_lock);
2253
2254 return is_cp;
2255}
2256
2257
2258
2259
2260static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
2261 struct f2fs_summary *sum)
2262{
2263 struct curseg_info *curseg = CURSEG_I(sbi, type);
2264 void *addr = curseg->sum_blk;
2265 addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
2266 memcpy(addr, sum, sizeof(struct f2fs_summary));
2267}
2268
2269
2270
2271
2272int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2273{
2274 int valid_sum_count = 0;
2275 int i, sum_in_page;
2276
2277 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2278 if (sbi->ckpt->alloc_type[i] == SSR)
2279 valid_sum_count += sbi->blocks_per_seg;
2280 else {
2281 if (for_ra)
2282 valid_sum_count += le16_to_cpu(
2283 F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2284 else
2285 valid_sum_count += curseg_blkoff(sbi, i);
2286 }
2287 }
2288
2289 sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2290 SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2291 if (valid_sum_count <= sum_in_page)
2292 return 1;
2293 else if ((valid_sum_count - sum_in_page) <=
2294 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2295 return 2;
2296 return 3;
2297}
2298
2299
2300
2301
2302struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2303{
2304 return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
2305}
2306
2307void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2308 void *src, block_t blk_addr)
2309{
2310 struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2311
2312 memcpy(page_address(page), src, PAGE_SIZE);
2313 set_page_dirty(page);
2314 f2fs_put_page(page, 1);
2315}
2316
2317static void write_sum_page(struct f2fs_sb_info *sbi,
2318 struct f2fs_summary_block *sum_blk, block_t blk_addr)
2319{
2320 f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2321}
2322
2323static void write_current_sum_page(struct f2fs_sb_info *sbi,
2324 int type, block_t blk_addr)
2325{
2326 struct curseg_info *curseg = CURSEG_I(sbi, type);
2327 struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2328 struct f2fs_summary_block *src = curseg->sum_blk;
2329 struct f2fs_summary_block *dst;
2330
2331 dst = (struct f2fs_summary_block *)page_address(page);
2332 memset(dst, 0, PAGE_SIZE);
2333
2334 mutex_lock(&curseg->curseg_mutex);
2335
2336 down_read(&curseg->journal_rwsem);
2337 memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2338 up_read(&curseg->journal_rwsem);
2339
2340 memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2341 memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2342
2343 mutex_unlock(&curseg->curseg_mutex);
2344
2345 set_page_dirty(page);
2346 f2fs_put_page(page, 1);
2347}
2348
2349static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
2350{
2351 struct curseg_info *curseg = CURSEG_I(sbi, type);
2352 unsigned int segno = curseg->segno + 1;
2353 struct free_segmap_info *free_i = FREE_I(sbi);
2354
2355 if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
2356 return !test_bit(segno, free_i->free_segmap);
2357 return 0;
2358}
2359
2360
2361
2362
2363
2364static void get_new_segment(struct f2fs_sb_info *sbi,
2365 unsigned int *newseg, bool new_sec, int dir)
2366{
2367 struct free_segmap_info *free_i = FREE_I(sbi);
2368 unsigned int segno, secno, zoneno;
2369 unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2370 unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2371 unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2372 unsigned int left_start = hint;
2373 bool init = true;
2374 int go_left = 0;
2375 int i;
2376
2377 spin_lock(&free_i->segmap_lock);
2378
2379 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
2380 segno = find_next_zero_bit(free_i->free_segmap,
2381 GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2382 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2383 goto got_it;
2384 }
2385find_other_zone:
2386 secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2387 if (secno >= MAIN_SECS(sbi)) {
2388 if (dir == ALLOC_RIGHT) {
2389 secno = find_next_zero_bit(free_i->free_secmap,
2390 MAIN_SECS(sbi), 0);
2391 f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
2392 } else {
2393 go_left = 1;
2394 left_start = hint - 1;
2395 }
2396 }
2397 if (go_left == 0)
2398 goto skip_left;
2399
2400 while (test_bit(left_start, free_i->free_secmap)) {
2401 if (left_start > 0) {
2402 left_start--;
2403 continue;
2404 }
2405 left_start = find_next_zero_bit(free_i->free_secmap,
2406 MAIN_SECS(sbi), 0);
2407 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
2408 break;
2409 }
2410 secno = left_start;
2411skip_left:
2412 segno = GET_SEG_FROM_SEC(sbi, secno);
2413 zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2414
2415
2416 if (!init)
2417 goto got_it;
2418 if (sbi->secs_per_zone == 1)
2419 goto got_it;
2420 if (zoneno == old_zoneno)
2421 goto got_it;
2422 if (dir == ALLOC_LEFT) {
2423 if (!go_left && zoneno + 1 >= total_zones)
2424 goto got_it;
2425 if (go_left && zoneno == 0)
2426 goto got_it;
2427 }
2428 for (i = 0; i < NR_CURSEG_TYPE; i++)
2429 if (CURSEG_I(sbi, i)->zone == zoneno)
2430 break;
2431
2432 if (i < NR_CURSEG_TYPE) {
2433
2434 if (go_left)
2435 hint = zoneno * sbi->secs_per_zone - 1;
2436 else if (zoneno + 1 >= total_zones)
2437 hint = 0;
2438 else
2439 hint = (zoneno + 1) * sbi->secs_per_zone;
2440 init = false;
2441 goto find_other_zone;
2442 }
2443got_it:
2444
2445 f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2446 __set_inuse(sbi, segno);
2447 *newseg = segno;
2448 spin_unlock(&free_i->segmap_lock);
2449}
2450
2451static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2452{
2453 struct curseg_info *curseg = CURSEG_I(sbi, type);
2454 struct summary_footer *sum_footer;
2455
2456 curseg->segno = curseg->next_segno;
2457 curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2458 curseg->next_blkoff = 0;
2459 curseg->next_segno = NULL_SEGNO;
2460
2461 sum_footer = &(curseg->sum_blk->footer);
2462 memset(sum_footer, 0, sizeof(struct summary_footer));
2463 if (IS_DATASEG(type))
2464 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2465 if (IS_NODESEG(type))
2466 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2467 __set_sit_entry_type(sbi, type, curseg->segno, modified);
2468}
2469
2470static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2471{
2472
2473 if (__is_large_section(sbi))
2474 return CURSEG_I(sbi, type)->segno;
2475
2476 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2477 return 0;
2478
2479 if (test_opt(sbi, NOHEAP) &&
2480 (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
2481 return 0;
2482
2483 if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2484 return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2485
2486
2487 if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2488 return 0;
2489
2490 return CURSEG_I(sbi, type)->segno;
2491}
2492
2493
2494
2495
2496
2497static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2498{
2499 struct curseg_info *curseg = CURSEG_I(sbi, type);
2500 unsigned int segno = curseg->segno;
2501 int dir = ALLOC_LEFT;
2502
2503 write_sum_page(sbi, curseg->sum_blk,
2504 GET_SUM_BLOCK(sbi, segno));
2505 if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
2506 dir = ALLOC_RIGHT;
2507
2508 if (test_opt(sbi, NOHEAP))
2509 dir = ALLOC_RIGHT;
2510
2511 segno = __get_next_segno(sbi, type);
2512 get_new_segment(sbi, &segno, new_sec, dir);
2513 curseg->next_segno = segno;
2514 reset_curseg(sbi, type, 1);
2515 curseg->alloc_type = LFS;
2516}
2517
2518static void __next_free_blkoff(struct f2fs_sb_info *sbi,
2519 struct curseg_info *seg, block_t start)
2520{
2521 struct seg_entry *se = get_seg_entry(sbi, seg->segno);
2522 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2523 unsigned long *target_map = SIT_I(sbi)->tmp_map;
2524 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2525 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2526 int i, pos;
2527
2528 for (i = 0; i < entries; i++)
2529 target_map[i] = ckpt_map[i] | cur_map[i];
2530
2531 pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
2532
2533 seg->next_blkoff = pos;
2534}
2535
2536
2537
2538
2539
2540
2541static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
2542 struct curseg_info *seg)
2543{
2544 if (seg->alloc_type == SSR)
2545 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
2546 else
2547 seg->next_blkoff++;
2548}
2549
2550
2551
2552
2553
2554static void change_curseg(struct f2fs_sb_info *sbi, int type)
2555{
2556 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2557 struct curseg_info *curseg = CURSEG_I(sbi, type);
2558 unsigned int new_segno = curseg->next_segno;
2559 struct f2fs_summary_block *sum_node;
2560 struct page *sum_page;
2561
2562 write_sum_page(sbi, curseg->sum_blk,
2563 GET_SUM_BLOCK(sbi, curseg->segno));
2564 __set_test_and_inuse(sbi, new_segno);
2565
2566 mutex_lock(&dirty_i->seglist_lock);
2567 __remove_dirty_segment(sbi, new_segno, PRE);
2568 __remove_dirty_segment(sbi, new_segno, DIRTY);
2569 mutex_unlock(&dirty_i->seglist_lock);
2570
2571 reset_curseg(sbi, type, 1);
2572 curseg->alloc_type = SSR;
2573 __next_free_blkoff(sbi, curseg, 0);
2574
2575 sum_page = f2fs_get_sum_page(sbi, new_segno);
2576 f2fs_bug_on(sbi, IS_ERR(sum_page));
2577 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2578 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2579 f2fs_put_page(sum_page, 1);
2580}
2581
2582static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
2583{
2584 struct curseg_info *curseg = CURSEG_I(sbi, type);
2585 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
2586 unsigned segno = NULL_SEGNO;
2587 int i, cnt;
2588 bool reversed = false;
2589
2590
2591 if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
2592 curseg->next_segno = segno;
2593 return 1;
2594 }
2595
2596
2597 if (IS_NODESEG(type)) {
2598 if (type >= CURSEG_WARM_NODE) {
2599 reversed = true;
2600 i = CURSEG_COLD_NODE;
2601 } else {
2602 i = CURSEG_HOT_NODE;
2603 }
2604 cnt = NR_CURSEG_NODE_TYPE;
2605 } else {
2606 if (type >= CURSEG_WARM_DATA) {
2607 reversed = true;
2608 i = CURSEG_COLD_DATA;
2609 } else {
2610 i = CURSEG_HOT_DATA;
2611 }
2612 cnt = NR_CURSEG_DATA_TYPE;
2613 }
2614
2615 for (; cnt-- > 0; reversed ? i-- : i++) {
2616 if (i == type)
2617 continue;
2618 if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
2619 curseg->next_segno = segno;
2620 return 1;
2621 }
2622 }
2623
2624
2625 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2626 segno = get_free_segment(sbi);
2627 if (segno != NULL_SEGNO) {
2628 curseg->next_segno = segno;
2629 return 1;
2630 }
2631 }
2632 return 0;
2633}
2634
2635
2636
2637
2638
2639static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
2640 int type, bool force)
2641{
2642 struct curseg_info *curseg = CURSEG_I(sbi, type);
2643
2644 if (force)
2645 new_curseg(sbi, type, true);
2646 else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
2647 type == CURSEG_WARM_NODE)
2648 new_curseg(sbi, type, false);
2649 else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
2650 likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2651 new_curseg(sbi, type, false);
2652 else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
2653 change_curseg(sbi, type);
2654 else
2655 new_curseg(sbi, type, false);
2656
2657 stat_inc_seg_type(sbi, curseg);
2658}
2659
2660void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
2661 unsigned int start, unsigned int end)
2662{
2663 struct curseg_info *curseg = CURSEG_I(sbi, type);
2664 unsigned int segno;
2665
2666 down_read(&SM_I(sbi)->curseg_lock);
2667 mutex_lock(&curseg->curseg_mutex);
2668 down_write(&SIT_I(sbi)->sentry_lock);
2669
2670 segno = CURSEG_I(sbi, type)->segno;
2671 if (segno < start || segno > end)
2672 goto unlock;
2673
2674 if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
2675 change_curseg(sbi, type);
2676 else
2677 new_curseg(sbi, type, true);
2678
2679 stat_inc_seg_type(sbi, curseg);
2680
2681 locate_dirty_segment(sbi, segno);
2682unlock:
2683 up_write(&SIT_I(sbi)->sentry_lock);
2684
2685 if (segno != curseg->segno)
2686 f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
2687 type, segno, curseg->segno);
2688
2689 mutex_unlock(&curseg->curseg_mutex);
2690 up_read(&SM_I(sbi)->curseg_lock);
2691}
2692
2693void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
2694{
2695 struct curseg_info *curseg;
2696 unsigned int old_segno;
2697 int i;
2698
2699 down_write(&SIT_I(sbi)->sentry_lock);
2700
2701 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2702 curseg = CURSEG_I(sbi, i);
2703 old_segno = curseg->segno;
2704 SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
2705 locate_dirty_segment(sbi, old_segno);
2706 }
2707
2708 up_write(&SIT_I(sbi)->sentry_lock);
2709}
2710
2711static const struct segment_allocation default_salloc_ops = {
2712 .allocate_segment = allocate_segment_by_default,
2713};
2714
2715bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
2716 struct cp_control *cpc)
2717{
2718 __u64 trim_start = cpc->trim_start;
2719 bool has_candidate = false;
2720
2721 down_write(&SIT_I(sbi)->sentry_lock);
2722 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
2723 if (add_discard_addrs(sbi, cpc, true)) {
2724 has_candidate = true;
2725 break;
2726 }
2727 }
2728 up_write(&SIT_I(sbi)->sentry_lock);
2729
2730 cpc->trim_start = trim_start;
2731 return has_candidate;
2732}
2733
2734static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
2735 struct discard_policy *dpolicy,
2736 unsigned int start, unsigned int end)
2737{
2738 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2739 struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
2740 struct rb_node **insert_p = NULL, *insert_parent = NULL;
2741 struct discard_cmd *dc;
2742 struct blk_plug plug;
2743 int issued;
2744 unsigned int trimmed = 0;
2745
2746next:
2747 issued = 0;
2748
2749 mutex_lock(&dcc->cmd_lock);
2750 if (unlikely(dcc->rbtree_check))
2751 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
2752 &dcc->root));
2753
2754 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
2755 NULL, start,
2756 (struct rb_entry **)&prev_dc,
2757 (struct rb_entry **)&next_dc,
2758 &insert_p, &insert_parent, true, NULL);
2759 if (!dc)
2760 dc = next_dc;
2761
2762 blk_start_plug(&plug);
2763
2764 while (dc && dc->lstart <= end) {
2765 struct rb_node *node;
2766 int err = 0;
2767
2768 if (dc->len < dpolicy->granularity)
2769 goto skip;
2770
2771 if (dc->state != D_PREP) {
2772 list_move_tail(&dc->list, &dcc->fstrim_list);
2773 goto skip;
2774 }
2775
2776 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
2777
2778 if (issued >= dpolicy->max_requests) {
2779 start = dc->lstart + dc->len;
2780
2781 if (err)
2782 __remove_discard_cmd(sbi, dc);
2783
2784 blk_finish_plug(&plug);
2785 mutex_unlock(&dcc->cmd_lock);
2786 trimmed += __wait_all_discard_cmd(sbi, NULL);
2787 congestion_wait(BLK_RW_ASYNC, HZ/50);
2788 goto next;
2789 }
2790skip:
2791 node = rb_next(&dc->rb_node);
2792 if (err)
2793 __remove_discard_cmd(sbi, dc);
2794 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
2795
2796 if (fatal_signal_pending(current))
2797 break;
2798 }
2799
2800 blk_finish_plug(&plug);
2801 mutex_unlock(&dcc->cmd_lock);
2802
2803 return trimmed;
2804}
2805
2806int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
2807{
2808 __u64 start = F2FS_BYTES_TO_BLK(range->start);
2809 __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
2810 unsigned int start_segno, end_segno;
2811 block_t start_block, end_block;
2812 struct cp_control cpc;
2813 struct discard_policy dpolicy;
2814 unsigned long long trimmed = 0;
2815 int err = 0;
2816 bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
2817
2818 if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
2819 return -EINVAL;
2820
2821 if (end < MAIN_BLKADDR(sbi))
2822 goto out;
2823
2824 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
2825 f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
2826 return -EFSCORRUPTED;
2827 }
2828
2829
2830 start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
2831 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
2832 GET_SEGNO(sbi, end);
2833 if (need_align) {
2834 start_segno = rounddown(start_segno, sbi->segs_per_sec);
2835 end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
2836 }
2837
2838 cpc.reason = CP_DISCARD;
2839 cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
2840 cpc.trim_start = start_segno;
2841 cpc.trim_end = end_segno;
2842
2843 if (sbi->discard_blks == 0)
2844 goto out;
2845
2846 mutex_lock(&sbi->gc_mutex);
2847 err = f2fs_write_checkpoint(sbi, &cpc);
2848 mutex_unlock(&sbi->gc_mutex);
2849 if (err)
2850 goto out;
2851
2852
2853
2854
2855
2856
2857
2858 if (f2fs_realtime_discard_enable(sbi))
2859 goto out;
2860
2861 start_block = START_BLOCK(sbi, start_segno);
2862 end_block = START_BLOCK(sbi, end_segno + 1);
2863
2864 __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
2865 trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
2866 start_block, end_block);
2867
2868 trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
2869 start_block, end_block);
2870out:
2871 if (!err)
2872 range->len = F2FS_BLK_TO_BYTES(trimmed);
2873 return err;
2874}
2875
2876static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
2877{
2878 struct curseg_info *curseg = CURSEG_I(sbi, type);
2879 if (curseg->next_blkoff < sbi->blocks_per_seg)
2880 return true;
2881 return false;
2882}
2883
2884int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
2885{
2886 switch (hint) {
2887 case WRITE_LIFE_SHORT:
2888 return CURSEG_HOT_DATA;
2889 case WRITE_LIFE_EXTREME:
2890 return CURSEG_COLD_DATA;
2891 default:
2892 return CURSEG_WARM_DATA;
2893 }
2894}
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
2958 enum page_type type, enum temp_type temp)
2959{
2960 if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
2961 if (type == DATA) {
2962 if (temp == WARM)
2963 return WRITE_LIFE_NOT_SET;
2964 else if (temp == HOT)
2965 return WRITE_LIFE_SHORT;
2966 else if (temp == COLD)
2967 return WRITE_LIFE_EXTREME;
2968 } else {
2969 return WRITE_LIFE_NOT_SET;
2970 }
2971 } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
2972 if (type == DATA) {
2973 if (temp == WARM)
2974 return WRITE_LIFE_LONG;
2975 else if (temp == HOT)
2976 return WRITE_LIFE_SHORT;
2977 else if (temp == COLD)
2978 return WRITE_LIFE_EXTREME;
2979 } else if (type == NODE) {
2980 if (temp == WARM || temp == HOT)
2981 return WRITE_LIFE_NOT_SET;
2982 else if (temp == COLD)
2983 return WRITE_LIFE_NONE;
2984 } else if (type == META) {
2985 return WRITE_LIFE_MEDIUM;
2986 }
2987 }
2988 return WRITE_LIFE_NOT_SET;
2989}
2990
2991static int __get_segment_type_2(struct f2fs_io_info *fio)
2992{
2993 if (fio->type == DATA)
2994 return CURSEG_HOT_DATA;
2995 else
2996 return CURSEG_HOT_NODE;
2997}
2998
2999static int __get_segment_type_4(struct f2fs_io_info *fio)
3000{
3001 if (fio->type == DATA) {
3002 struct inode *inode = fio->page->mapping->host;
3003
3004 if (S_ISDIR(inode->i_mode))
3005 return CURSEG_HOT_DATA;
3006 else
3007 return CURSEG_COLD_DATA;
3008 } else {
3009 if (IS_DNODE(fio->page) && is_cold_node(fio->page))
3010 return CURSEG_WARM_NODE;
3011 else
3012 return CURSEG_COLD_NODE;
3013 }
3014}
3015
3016static int __get_segment_type_6(struct f2fs_io_info *fio)
3017{
3018 if (fio->type == DATA) {
3019 struct inode *inode = fio->page->mapping->host;
3020
3021 if (is_cold_data(fio->page) || file_is_cold(inode))
3022 return CURSEG_COLD_DATA;
3023 if (file_is_hot(inode) ||
3024 is_inode_flag_set(inode, FI_HOT_DATA) ||
3025 f2fs_is_atomic_file(inode) ||
3026 f2fs_is_volatile_file(inode))
3027 return CURSEG_HOT_DATA;
3028 return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
3029 } else {
3030 if (IS_DNODE(fio->page))
3031 return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
3032 CURSEG_HOT_NODE;
3033 return CURSEG_COLD_NODE;
3034 }
3035}
3036
3037static int __get_segment_type(struct f2fs_io_info *fio)
3038{
3039 int type = 0;
3040
3041 switch (F2FS_OPTION(fio->sbi).active_logs) {
3042 case 2:
3043 type = __get_segment_type_2(fio);
3044 break;
3045 case 4:
3046 type = __get_segment_type_4(fio);
3047 break;
3048 case 6:
3049 type = __get_segment_type_6(fio);
3050 break;
3051 default:
3052 f2fs_bug_on(fio->sbi, true);
3053 }
3054
3055 if (IS_HOT(type))
3056 fio->temp = HOT;
3057 else if (IS_WARM(type))
3058 fio->temp = WARM;
3059 else
3060 fio->temp = COLD;
3061 return type;
3062}
3063
3064void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
3065 block_t old_blkaddr, block_t *new_blkaddr,
3066 struct f2fs_summary *sum, int type,
3067 struct f2fs_io_info *fio, bool add_list)
3068{
3069 struct sit_info *sit_i = SIT_I(sbi);
3070 struct curseg_info *curseg = CURSEG_I(sbi, type);
3071
3072 down_read(&SM_I(sbi)->curseg_lock);
3073
3074 mutex_lock(&curseg->curseg_mutex);
3075 down_write(&sit_i->sentry_lock);
3076
3077 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
3078
3079 f2fs_wait_discard_bio(sbi, *new_blkaddr);
3080
3081
3082
3083
3084
3085
3086 __add_sum_entry(sbi, type, sum);
3087
3088 __refresh_next_blkoff(sbi, curseg);
3089
3090 stat_inc_block_count(sbi, curseg);
3091
3092
3093
3094
3095
3096 update_sit_entry(sbi, *new_blkaddr, 1);
3097 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
3098 update_sit_entry(sbi, old_blkaddr, -1);
3099
3100 if (!__has_curseg_space(sbi, type))
3101 sit_i->s_ops->allocate_segment(sbi, type, false);
3102
3103
3104
3105
3106
3107
3108 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3109 locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3110
3111 up_write(&sit_i->sentry_lock);
3112
3113 if (page && IS_NODESEG(type)) {
3114 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
3115
3116 f2fs_inode_chksum_set(sbi, page);
3117 }
3118
3119 if (F2FS_IO_ALIGNED(sbi))
3120 fio->retry = false;
3121
3122 if (add_list) {
3123 struct f2fs_bio_info *io;
3124
3125 INIT_LIST_HEAD(&fio->list);
3126 fio->in_list = true;
3127 io = sbi->write_io[fio->type] + fio->temp;
3128 spin_lock(&io->io_lock);
3129 list_add_tail(&fio->list, &io->io_list);
3130 spin_unlock(&io->io_lock);
3131 }
3132
3133 mutex_unlock(&curseg->curseg_mutex);
3134
3135 up_read(&SM_I(sbi)->curseg_lock);
3136}
3137
3138static void update_device_state(struct f2fs_io_info *fio)
3139{
3140 struct f2fs_sb_info *sbi = fio->sbi;
3141 unsigned int devidx;
3142
3143 if (!f2fs_is_multi_device(sbi))
3144 return;
3145
3146 devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
3147
3148
3149 f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
3150
3151
3152 if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3153 spin_lock(&sbi->dev_lock);
3154 f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3155 spin_unlock(&sbi->dev_lock);
3156 }
3157}
3158
3159static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3160{
3161 int type = __get_segment_type(fio);
3162 bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA);
3163
3164 if (keep_order)
3165 down_read(&fio->sbi->io_order_lock);
3166reallocate:
3167 f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3168 &fio->new_blkaddr, sum, type, fio, true);
3169 if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
3170 invalidate_mapping_pages(META_MAPPING(fio->sbi),
3171 fio->old_blkaddr, fio->old_blkaddr);
3172
3173
3174 f2fs_submit_page_write(fio);
3175 if (fio->retry) {
3176 fio->old_blkaddr = fio->new_blkaddr;
3177 goto reallocate;
3178 }
3179
3180 update_device_state(fio);
3181
3182 if (keep_order)
3183 up_read(&fio->sbi->io_order_lock);
3184}
3185
3186void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
3187 enum iostat_type io_type)
3188{
3189 struct f2fs_io_info fio = {
3190 .sbi = sbi,
3191 .type = META,
3192 .temp = HOT,
3193 .op = REQ_OP_WRITE,
3194 .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3195 .old_blkaddr = page->index,
3196 .new_blkaddr = page->index,
3197 .page = page,
3198 .encrypted_page = NULL,
3199 .in_list = false,
3200 };
3201
3202 if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
3203 fio.op_flags &= ~REQ_META;
3204
3205 set_page_writeback(page);
3206 ClearPageError(page);
3207 f2fs_submit_page_write(&fio);
3208
3209 stat_inc_meta_count(sbi, page->index);
3210 f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
3211}
3212
3213void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3214{
3215 struct f2fs_summary sum;
3216
3217 set_summary(&sum, nid, 0, 0);
3218 do_write_page(&sum, fio);
3219
3220 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3221}
3222
3223void f2fs_outplace_write_data(struct dnode_of_data *dn,
3224 struct f2fs_io_info *fio)
3225{
3226 struct f2fs_sb_info *sbi = fio->sbi;
3227 struct f2fs_summary sum;
3228
3229 f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3230 set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3231 do_write_page(&sum, fio);
3232 f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3233
3234 f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
3235}
3236
3237int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3238{
3239 int err;
3240 struct f2fs_sb_info *sbi = fio->sbi;
3241 unsigned int segno;
3242
3243 fio->new_blkaddr = fio->old_blkaddr;
3244
3245 __get_segment_type(fio);
3246
3247 segno = GET_SEGNO(sbi, fio->new_blkaddr);
3248
3249 if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
3250 set_sbi_flag(sbi, SBI_NEED_FSCK);
3251 f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
3252 __func__, segno);
3253 return -EFSCORRUPTED;
3254 }
3255
3256 stat_inc_inplace_blocks(fio->sbi);
3257
3258 if (fio->bio)
3259 err = f2fs_merge_page_bio(fio);
3260 else
3261 err = f2fs_submit_page_bio(fio);
3262 if (!err) {
3263 update_device_state(fio);
3264 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3265 }
3266
3267 return err;
3268}
3269
3270static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3271 unsigned int segno)
3272{
3273 int i;
3274
3275 for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3276 if (CURSEG_I(sbi, i)->segno == segno)
3277 break;
3278 }
3279 return i;
3280}
3281
3282void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3283 block_t old_blkaddr, block_t new_blkaddr,
3284 bool recover_curseg, bool recover_newaddr)
3285{
3286 struct sit_info *sit_i = SIT_I(sbi);
3287 struct curseg_info *curseg;
3288 unsigned int segno, old_cursegno;
3289 struct seg_entry *se;
3290 int type;
3291 unsigned short old_blkoff;
3292
3293 segno = GET_SEGNO(sbi, new_blkaddr);
3294 se = get_seg_entry(sbi, segno);
3295 type = se->type;
3296
3297 down_write(&SM_I(sbi)->curseg_lock);
3298
3299 if (!recover_curseg) {
3300
3301 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
3302 if (old_blkaddr == NULL_ADDR)
3303 type = CURSEG_COLD_DATA;
3304 else
3305 type = CURSEG_WARM_DATA;
3306 }
3307 } else {
3308 if (IS_CURSEG(sbi, segno)) {
3309
3310 type = __f2fs_get_curseg(sbi, segno);
3311 f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
3312 } else {
3313 type = CURSEG_WARM_DATA;
3314 }
3315 }
3316
3317 f2fs_bug_on(sbi, !IS_DATASEG(type));
3318 curseg = CURSEG_I(sbi, type);
3319
3320 mutex_lock(&curseg->curseg_mutex);
3321 down_write(&sit_i->sentry_lock);
3322
3323 old_cursegno = curseg->segno;
3324 old_blkoff = curseg->next_blkoff;
3325
3326
3327 if (segno != curseg->segno) {
3328 curseg->next_segno = segno;
3329 change_curseg(sbi, type);
3330 }
3331
3332 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3333 __add_sum_entry(sbi, type, sum);
3334
3335 if (!recover_curseg || recover_newaddr)
3336 update_sit_entry(sbi, new_blkaddr, 1);
3337 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
3338 invalidate_mapping_pages(META_MAPPING(sbi),
3339 old_blkaddr, old_blkaddr);
3340 update_sit_entry(sbi, old_blkaddr, -1);
3341 }
3342
3343 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3344 locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
3345
3346 locate_dirty_segment(sbi, old_cursegno);
3347
3348 if (recover_curseg) {
3349 if (old_cursegno != curseg->segno) {
3350 curseg->next_segno = old_cursegno;
3351 change_curseg(sbi, type);
3352 }
3353 curseg->next_blkoff = old_blkoff;
3354 }
3355
3356 up_write(&sit_i->sentry_lock);
3357 mutex_unlock(&curseg->curseg_mutex);
3358 up_write(&SM_I(sbi)->curseg_lock);
3359}
3360
3361void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
3362 block_t old_addr, block_t new_addr,
3363 unsigned char version, bool recover_curseg,
3364 bool recover_newaddr)
3365{
3366 struct f2fs_summary sum;
3367
3368 set_summary(&sum, dn->nid, dn->ofs_in_node, version);
3369
3370 f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3371 recover_curseg, recover_newaddr);
3372
3373 f2fs_update_data_blkaddr(dn, new_addr);
3374}
3375
3376void f2fs_wait_on_page_writeback(struct page *page,
3377 enum page_type type, bool ordered, bool locked)
3378{
3379 if (PageWriteback(page)) {
3380 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
3381
3382 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
3383 if (ordered) {
3384 wait_on_page_writeback(page);
3385 f2fs_bug_on(sbi, locked && PageWriteback(page));
3386 } else {
3387 wait_for_stable_page(page);
3388 }
3389 }
3390}
3391
3392void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3393{
3394 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3395 struct page *cpage;
3396
3397 if (!f2fs_post_read_required(inode))
3398 return;
3399
3400 if (!__is_valid_data_blkaddr(blkaddr))
3401 return;
3402
3403 cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
3404 if (cpage) {
3405 f2fs_wait_on_page_writeback(cpage, DATA, true, true);
3406 f2fs_put_page(cpage, 1);
3407 }
3408}
3409
3410void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
3411 block_t len)
3412{
3413 block_t i;
3414
3415 for (i = 0; i < len; i++)
3416 f2fs_wait_on_block_writeback(inode, blkaddr + i);
3417}
3418
3419static int read_compacted_summaries(struct f2fs_sb_info *sbi)
3420{
3421 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3422 struct curseg_info *seg_i;
3423 unsigned char *kaddr;
3424 struct page *page;
3425 block_t start;
3426 int i, j, offset;
3427
3428 start = start_sum_block(sbi);
3429
3430 page = f2fs_get_meta_page(sbi, start++);
3431 if (IS_ERR(page))
3432 return PTR_ERR(page);
3433 kaddr = (unsigned char *)page_address(page);
3434
3435
3436 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3437 memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
3438
3439
3440 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3441 memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
3442 offset = 2 * SUM_JOURNAL_SIZE;
3443
3444
3445 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3446 unsigned short blk_off;
3447 unsigned int segno;
3448
3449 seg_i = CURSEG_I(sbi, i);
3450 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
3451 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
3452 seg_i->next_segno = segno;
3453 reset_curseg(sbi, i, 0);
3454 seg_i->alloc_type = ckpt->alloc_type[i];
3455 seg_i->next_blkoff = blk_off;
3456
3457 if (seg_i->alloc_type == SSR)
3458 blk_off = sbi->blocks_per_seg;
3459
3460 for (j = 0; j < blk_off; j++) {
3461 struct f2fs_summary *s;
3462 s = (struct f2fs_summary *)(kaddr + offset);
3463 seg_i->sum_blk->entries[j] = *s;
3464 offset += SUMMARY_SIZE;
3465 if (offset + SUMMARY_SIZE <= PAGE_SIZE -
3466 SUM_FOOTER_SIZE)
3467 continue;
3468
3469 f2fs_put_page(page, 1);
3470 page = NULL;
3471
3472 page = f2fs_get_meta_page(sbi, start++);
3473 if (IS_ERR(page))
3474 return PTR_ERR(page);
3475 kaddr = (unsigned char *)page_address(page);
3476 offset = 0;
3477 }
3478 }
3479 f2fs_put_page(page, 1);
3480 return 0;
3481}
3482
3483static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
3484{
3485 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3486 struct f2fs_summary_block *sum;
3487 struct curseg_info *curseg;
3488 struct page *new;
3489 unsigned short blk_off;
3490 unsigned int segno = 0;
3491 block_t blk_addr = 0;
3492 int err = 0;
3493
3494
3495 if (IS_DATASEG(type)) {
3496 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
3497 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
3498 CURSEG_HOT_DATA]);
3499 if (__exist_node_summaries(sbi))
3500 blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
3501 else
3502 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
3503 } else {
3504 segno = le32_to_cpu(ckpt->cur_node_segno[type -
3505 CURSEG_HOT_NODE]);
3506 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
3507 CURSEG_HOT_NODE]);
3508 if (__exist_node_summaries(sbi))
3509 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
3510 type - CURSEG_HOT_NODE);
3511 else
3512 blk_addr = GET_SUM_BLOCK(sbi, segno);
3513 }
3514
3515 new = f2fs_get_meta_page(sbi, blk_addr);
3516 if (IS_ERR(new))
3517 return PTR_ERR(new);
3518 sum = (struct f2fs_summary_block *)page_address(new);
3519
3520 if (IS_NODESEG(type)) {
3521 if (__exist_node_summaries(sbi)) {
3522 struct f2fs_summary *ns = &sum->entries[0];
3523 int i;
3524 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
3525 ns->version = 0;
3526 ns->ofs_in_node = 0;
3527 }
3528 } else {
3529 err = f2fs_restore_node_summary(sbi, segno, sum);
3530 if (err)
3531 goto out;
3532 }
3533 }
3534
3535
3536 curseg = CURSEG_I(sbi, type);
3537 mutex_lock(&curseg->curseg_mutex);
3538
3539
3540 down_write(&curseg->journal_rwsem);
3541 memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
3542 up_write(&curseg->journal_rwsem);
3543
3544 memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
3545 memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
3546 curseg->next_segno = segno;
3547 reset_curseg(sbi, type, 0);
3548 curseg->alloc_type = ckpt->alloc_type[type];
3549 curseg->next_blkoff = blk_off;
3550 mutex_unlock(&curseg->curseg_mutex);
3551out:
3552 f2fs_put_page(new, 1);
3553 return err;
3554}
3555
3556static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
3557{
3558 struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
3559 struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
3560 int type = CURSEG_HOT_DATA;
3561 int err;
3562
3563 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
3564 int npages = f2fs_npages_for_summary_flush(sbi, true);
3565
3566 if (npages >= 2)
3567 f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
3568 META_CP, true);
3569
3570
3571 err = read_compacted_summaries(sbi);
3572 if (err)
3573 return err;
3574 type = CURSEG_HOT_NODE;
3575 }
3576
3577 if (__exist_node_summaries(sbi))
3578 f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
3579 NR_CURSEG_TYPE - type, META_CP, true);
3580
3581 for (; type <= CURSEG_COLD_NODE; type++) {
3582 err = read_normal_summaries(sbi, type);
3583 if (err)
3584 return err;
3585 }
3586
3587
3588 if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
3589 sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
3590 f2fs_err(sbi, "invalid journal entries nats %u sits %u\n",
3591 nats_in_cursum(nat_j), sits_in_cursum(sit_j));
3592 return -EINVAL;
3593 }
3594
3595 return 0;
3596}
3597
3598static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
3599{
3600 struct page *page;
3601 unsigned char *kaddr;
3602 struct f2fs_summary *summary;
3603 struct curseg_info *seg_i;
3604 int written_size = 0;
3605 int i, j;
3606
3607 page = f2fs_grab_meta_page(sbi, blkaddr++);
3608 kaddr = (unsigned char *)page_address(page);
3609 memset(kaddr, 0, PAGE_SIZE);
3610
3611
3612 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3613 memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
3614 written_size += SUM_JOURNAL_SIZE;
3615
3616
3617 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3618 memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
3619 written_size += SUM_JOURNAL_SIZE;
3620
3621
3622 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3623 unsigned short blkoff;
3624 seg_i = CURSEG_I(sbi, i);
3625 if (sbi->ckpt->alloc_type[i] == SSR)
3626 blkoff = sbi->blocks_per_seg;
3627 else
3628 blkoff = curseg_blkoff(sbi, i);
3629
3630 for (j = 0; j < blkoff; j++) {
3631 if (!page) {
3632 page = f2fs_grab_meta_page(sbi, blkaddr++);
3633 kaddr = (unsigned char *)page_address(page);
3634 memset(kaddr, 0, PAGE_SIZE);
3635 written_size = 0;
3636 }
3637 summary = (struct f2fs_summary *)(kaddr + written_size);
3638 *summary = seg_i->sum_blk->entries[j];
3639 written_size += SUMMARY_SIZE;
3640
3641 if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
3642 SUM_FOOTER_SIZE)
3643 continue;
3644
3645 set_page_dirty(page);
3646 f2fs_put_page(page, 1);
3647 page = NULL;
3648 }
3649 }
3650 if (page) {
3651 set_page_dirty(page);
3652 f2fs_put_page(page, 1);
3653 }
3654}
3655
3656static void write_normal_summaries(struct f2fs_sb_info *sbi,
3657 block_t blkaddr, int type)
3658{
3659 int i, end;
3660 if (IS_DATASEG(type))
3661 end = type + NR_CURSEG_DATA_TYPE;
3662 else
3663 end = type + NR_CURSEG_NODE_TYPE;
3664
3665 for (i = type; i < end; i++)
3666 write_current_sum_page(sbi, i, blkaddr + (i - type));
3667}
3668
3669void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3670{
3671 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
3672 write_compacted_summaries(sbi, start_blk);
3673 else
3674 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
3675}
3676
3677void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3678{
3679 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
3680}
3681
3682int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
3683 unsigned int val, int alloc)
3684{
3685 int i;
3686
3687 if (type == NAT_JOURNAL) {
3688 for (i = 0; i < nats_in_cursum(journal); i++) {
3689 if (le32_to_cpu(nid_in_journal(journal, i)) == val)
3690 return i;
3691 }
3692 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
3693 return update_nats_in_cursum(journal, 1);
3694 } else if (type == SIT_JOURNAL) {
3695 for (i = 0; i < sits_in_cursum(journal); i++)
3696 if (le32_to_cpu(segno_in_journal(journal, i)) == val)
3697 return i;
3698 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
3699 return update_sits_in_cursum(journal, 1);
3700 }
3701 return -1;
3702}
3703
3704static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
3705 unsigned int segno)
3706{
3707 return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
3708}
3709
3710static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
3711 unsigned int start)
3712{
3713 struct sit_info *sit_i = SIT_I(sbi);
3714 struct page *page;
3715 pgoff_t src_off, dst_off;
3716
3717 src_off = current_sit_addr(sbi, start);
3718 dst_off = next_sit_addr(sbi, src_off);
3719
3720 page = f2fs_grab_meta_page(sbi, dst_off);
3721 seg_info_to_sit_page(sbi, page, start);
3722
3723 set_page_dirty(page);
3724 set_to_next_sit(sit_i, start);
3725
3726 return page;
3727}
3728
3729static struct sit_entry_set *grab_sit_entry_set(void)
3730{
3731 struct sit_entry_set *ses =
3732 f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
3733
3734 ses->entry_cnt = 0;
3735 INIT_LIST_HEAD(&ses->set_list);
3736 return ses;
3737}
3738
3739static void release_sit_entry_set(struct sit_entry_set *ses)
3740{
3741 list_del(&ses->set_list);
3742 kmem_cache_free(sit_entry_set_slab, ses);
3743}
3744
3745static void adjust_sit_entry_set(struct sit_entry_set *ses,
3746 struct list_head *head)
3747{
3748 struct sit_entry_set *next = ses;
3749
3750 if (list_is_last(&ses->set_list, head))
3751 return;
3752
3753 list_for_each_entry_continue(next, head, set_list)
3754 if (ses->entry_cnt <= next->entry_cnt)
3755 break;
3756
3757 list_move_tail(&ses->set_list, &next->set_list);
3758}
3759
3760static void add_sit_entry(unsigned int segno, struct list_head *head)
3761{
3762 struct sit_entry_set *ses;
3763 unsigned int start_segno = START_SEGNO(segno);
3764
3765 list_for_each_entry(ses, head, set_list) {
3766 if (ses->start_segno == start_segno) {
3767 ses->entry_cnt++;
3768 adjust_sit_entry_set(ses, head);
3769 return;
3770 }
3771 }
3772
3773 ses = grab_sit_entry_set();
3774
3775 ses->start_segno = start_segno;
3776 ses->entry_cnt++;
3777 list_add(&ses->set_list, head);
3778}
3779
3780static void add_sits_in_set(struct f2fs_sb_info *sbi)
3781{
3782 struct f2fs_sm_info *sm_info = SM_I(sbi);
3783 struct list_head *set_list = &sm_info->sit_entry_set;
3784 unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
3785 unsigned int segno;
3786
3787 for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
3788 add_sit_entry(segno, set_list);
3789}
3790
3791static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
3792{
3793 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3794 struct f2fs_journal *journal = curseg->journal;
3795 int i;
3796
3797 down_write(&curseg->journal_rwsem);
3798 for (i = 0; i < sits_in_cursum(journal); i++) {
3799 unsigned int segno;
3800 bool dirtied;
3801
3802 segno = le32_to_cpu(segno_in_journal(journal, i));
3803 dirtied = __mark_sit_entry_dirty(sbi, segno);
3804
3805 if (!dirtied)
3806 add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
3807 }
3808 update_sits_in_cursum(journal, -i);
3809 up_write(&curseg->journal_rwsem);
3810}
3811
3812
3813
3814
3815
3816void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
3817{
3818 struct sit_info *sit_i = SIT_I(sbi);
3819 unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
3820 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3821 struct f2fs_journal *journal = curseg->journal;
3822 struct sit_entry_set *ses, *tmp;
3823 struct list_head *head = &SM_I(sbi)->sit_entry_set;
3824 bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
3825 struct seg_entry *se;
3826
3827 down_write(&sit_i->sentry_lock);
3828
3829 if (!sit_i->dirty_sentries)
3830 goto out;
3831
3832
3833
3834
3835
3836 add_sits_in_set(sbi);
3837
3838
3839
3840
3841
3842
3843 if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
3844 !to_journal)
3845 remove_sits_in_journal(sbi);
3846
3847
3848
3849
3850
3851
3852 list_for_each_entry_safe(ses, tmp, head, set_list) {
3853 struct page *page = NULL;
3854 struct f2fs_sit_block *raw_sit = NULL;
3855 unsigned int start_segno = ses->start_segno;
3856 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
3857 (unsigned long)MAIN_SEGS(sbi));
3858 unsigned int segno = start_segno;
3859
3860 if (to_journal &&
3861 !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
3862 to_journal = false;
3863
3864 if (to_journal) {
3865 down_write(&curseg->journal_rwsem);
3866 } else {
3867 page = get_next_sit_page(sbi, start_segno);
3868 raw_sit = page_address(page);
3869 }
3870
3871
3872 for_each_set_bit_from(segno, bitmap, end) {
3873 int offset, sit_offset;
3874
3875 se = get_seg_entry(sbi, segno);
3876#ifdef CONFIG_F2FS_CHECK_FS
3877 if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
3878 SIT_VBLOCK_MAP_SIZE))
3879 f2fs_bug_on(sbi, 1);
3880#endif
3881
3882
3883 if (!(cpc->reason & CP_DISCARD)) {
3884 cpc->trim_start = segno;
3885 add_discard_addrs(sbi, cpc, false);
3886 }
3887
3888 if (to_journal) {
3889 offset = f2fs_lookup_journal_in_cursum(journal,
3890 SIT_JOURNAL, segno, 1);
3891 f2fs_bug_on(sbi, offset < 0);
3892 segno_in_journal(journal, offset) =
3893 cpu_to_le32(segno);
3894 seg_info_to_raw_sit(se,
3895 &sit_in_journal(journal, offset));
3896 check_block_count(sbi, segno,
3897 &sit_in_journal(journal, offset));
3898 } else {
3899 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
3900 seg_info_to_raw_sit(se,
3901 &raw_sit->entries[sit_offset]);
3902 check_block_count(sbi, segno,
3903 &raw_sit->entries[sit_offset]);
3904 }
3905
3906 __clear_bit(segno, bitmap);
3907 sit_i->dirty_sentries--;
3908 ses->entry_cnt--;
3909 }
3910
3911 if (to_journal)
3912 up_write(&curseg->journal_rwsem);
3913 else
3914 f2fs_put_page(page, 1);
3915
3916 f2fs_bug_on(sbi, ses->entry_cnt);
3917 release_sit_entry_set(ses);
3918 }
3919
3920 f2fs_bug_on(sbi, !list_empty(head));
3921 f2fs_bug_on(sbi, sit_i->dirty_sentries);
3922out:
3923 if (cpc->reason & CP_DISCARD) {
3924 __u64 trim_start = cpc->trim_start;
3925
3926 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
3927 add_discard_addrs(sbi, cpc, false);
3928
3929 cpc->trim_start = trim_start;
3930 }
3931 up_write(&sit_i->sentry_lock);
3932
3933 set_prefree_as_free_segments(sbi);
3934}
3935
3936static int build_sit_info(struct f2fs_sb_info *sbi)
3937{
3938 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
3939 struct sit_info *sit_i;
3940 unsigned int sit_segs, start;
3941 char *src_bitmap, *bitmap;
3942 unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
3943
3944
3945 sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
3946 if (!sit_i)
3947 return -ENOMEM;
3948
3949 SM_I(sbi)->sit_info = sit_i;
3950
3951 sit_i->sentries =
3952 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
3953 MAIN_SEGS(sbi)),
3954 GFP_KERNEL);
3955 if (!sit_i->sentries)
3956 return -ENOMEM;
3957
3958 main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
3959 sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
3960 GFP_KERNEL);
3961 if (!sit_i->dirty_sentries_bitmap)
3962 return -ENOMEM;
3963
3964#ifdef CONFIG_F2FS_CHECK_FS
3965 bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 4;
3966#else
3967 bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 3;
3968#endif
3969 sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
3970 if (!sit_i->bitmap)
3971 return -ENOMEM;
3972
3973 bitmap = sit_i->bitmap;
3974
3975 for (start = 0; start < MAIN_SEGS(sbi); start++) {
3976 sit_i->sentries[start].cur_valid_map = bitmap;
3977 bitmap += SIT_VBLOCK_MAP_SIZE;
3978
3979 sit_i->sentries[start].ckpt_valid_map = bitmap;
3980 bitmap += SIT_VBLOCK_MAP_SIZE;
3981
3982#ifdef CONFIG_F2FS_CHECK_FS
3983 sit_i->sentries[start].cur_valid_map_mir = bitmap;
3984 bitmap += SIT_VBLOCK_MAP_SIZE;
3985#endif
3986
3987 sit_i->sentries[start].discard_map = bitmap;
3988 bitmap += SIT_VBLOCK_MAP_SIZE;
3989 }
3990
3991 sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3992 if (!sit_i->tmp_map)
3993 return -ENOMEM;
3994
3995 if (__is_large_section(sbi)) {
3996 sit_i->sec_entries =
3997 f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
3998 MAIN_SECS(sbi)),
3999 GFP_KERNEL);
4000 if (!sit_i->sec_entries)
4001 return -ENOMEM;
4002 }
4003
4004
4005 sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
4006
4007
4008 sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
4009 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
4010
4011 sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
4012 if (!sit_i->sit_bitmap)
4013 return -ENOMEM;
4014
4015#ifdef CONFIG_F2FS_CHECK_FS
4016 sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
4017 sit_bitmap_size, GFP_KERNEL);
4018 if (!sit_i->sit_bitmap_mir)
4019 return -ENOMEM;
4020
4021 sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
4022 main_bitmap_size, GFP_KERNEL);
4023 if (!sit_i->invalid_segmap)
4024 return -ENOMEM;
4025#endif
4026
4027
4028 sit_i->s_ops = &default_salloc_ops;
4029
4030 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
4031 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
4032 sit_i->written_valid_blocks = 0;
4033 sit_i->bitmap_size = sit_bitmap_size;
4034 sit_i->dirty_sentries = 0;
4035 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
4036 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
4037 sit_i->mounted_time = ktime_get_real_seconds();
4038 init_rwsem(&sit_i->sentry_lock);
4039 return 0;
4040}
4041
4042static int build_free_segmap(struct f2fs_sb_info *sbi)
4043{
4044 struct free_segmap_info *free_i;
4045 unsigned int bitmap_size, sec_bitmap_size;
4046
4047
4048 free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
4049 if (!free_i)
4050 return -ENOMEM;
4051
4052 SM_I(sbi)->free_info = free_i;
4053
4054 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4055 free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
4056 if (!free_i->free_segmap)
4057 return -ENOMEM;
4058
4059 sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4060 free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
4061 if (!free_i->free_secmap)
4062 return -ENOMEM;
4063
4064
4065 memset(free_i->free_segmap, 0xff, bitmap_size);
4066 memset(free_i->free_secmap, 0xff, sec_bitmap_size);
4067
4068
4069 free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
4070 free_i->free_segments = 0;
4071 free_i->free_sections = 0;
4072 spin_lock_init(&free_i->segmap_lock);
4073 return 0;
4074}
4075
4076static int build_curseg(struct f2fs_sb_info *sbi)
4077{
4078 struct curseg_info *array;
4079 int i;
4080
4081 array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
4082 GFP_KERNEL);
4083 if (!array)
4084 return -ENOMEM;
4085
4086 SM_I(sbi)->curseg_array = array;
4087
4088 for (i = 0; i < NR_CURSEG_TYPE; i++) {
4089 mutex_init(&array[i].curseg_mutex);
4090 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
4091 if (!array[i].sum_blk)
4092 return -ENOMEM;
4093 init_rwsem(&array[i].journal_rwsem);
4094 array[i].journal = f2fs_kzalloc(sbi,
4095 sizeof(struct f2fs_journal), GFP_KERNEL);
4096 if (!array[i].journal)
4097 return -ENOMEM;
4098 array[i].segno = NULL_SEGNO;
4099 array[i].next_blkoff = 0;
4100 }
4101 return restore_curseg_summaries(sbi);
4102}
4103
4104static int build_sit_entries(struct f2fs_sb_info *sbi)
4105{
4106 struct sit_info *sit_i = SIT_I(sbi);
4107 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4108 struct f2fs_journal *journal = curseg->journal;
4109 struct seg_entry *se;
4110 struct f2fs_sit_entry sit;
4111 int sit_blk_cnt = SIT_BLK_CNT(sbi);
4112 unsigned int i, start, end;
4113 unsigned int readed, start_blk = 0;
4114 int err = 0;
4115 block_t total_node_blocks = 0;
4116
4117 do {
4118 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
4119 META_SIT, true);
4120
4121 start = start_blk * sit_i->sents_per_block;
4122 end = (start_blk + readed) * sit_i->sents_per_block;
4123
4124 for (; start < end && start < MAIN_SEGS(sbi); start++) {
4125 struct f2fs_sit_block *sit_blk;
4126 struct page *page;
4127
4128 se = &sit_i->sentries[start];
4129 page = get_current_sit_page(sbi, start);
4130 if (IS_ERR(page))
4131 return PTR_ERR(page);
4132 sit_blk = (struct f2fs_sit_block *)page_address(page);
4133 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
4134 f2fs_put_page(page, 1);
4135
4136 err = check_block_count(sbi, start, &sit);
4137 if (err)
4138 return err;
4139 seg_info_from_raw_sit(se, &sit);
4140 if (IS_NODESEG(se->type))
4141 total_node_blocks += se->valid_blocks;
4142
4143
4144 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4145 memset(se->discard_map, 0xff,
4146 SIT_VBLOCK_MAP_SIZE);
4147 } else {
4148 memcpy(se->discard_map,
4149 se->cur_valid_map,
4150 SIT_VBLOCK_MAP_SIZE);
4151 sbi->discard_blks +=
4152 sbi->blocks_per_seg -
4153 se->valid_blocks;
4154 }
4155
4156 if (__is_large_section(sbi))
4157 get_sec_entry(sbi, start)->valid_blocks +=
4158 se->valid_blocks;
4159 }
4160 start_blk += readed;
4161 } while (start_blk < sit_blk_cnt);
4162
4163 down_read(&curseg->journal_rwsem);
4164 for (i = 0; i < sits_in_cursum(journal); i++) {
4165 unsigned int old_valid_blocks;
4166
4167 start = le32_to_cpu(segno_in_journal(journal, i));
4168 if (start >= MAIN_SEGS(sbi)) {
4169 f2fs_err(sbi, "Wrong journal entry on segno %u",
4170 start);
4171 err = -EFSCORRUPTED;
4172 break;
4173 }
4174
4175 se = &sit_i->sentries[start];
4176 sit = sit_in_journal(journal, i);
4177
4178 old_valid_blocks = se->valid_blocks;
4179 if (IS_NODESEG(se->type))
4180 total_node_blocks -= old_valid_blocks;
4181
4182 err = check_block_count(sbi, start, &sit);
4183 if (err)
4184 break;
4185 seg_info_from_raw_sit(se, &sit);
4186 if (IS_NODESEG(se->type))
4187 total_node_blocks += se->valid_blocks;
4188
4189 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4190 memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
4191 } else {
4192 memcpy(se->discard_map, se->cur_valid_map,
4193 SIT_VBLOCK_MAP_SIZE);
4194 sbi->discard_blks += old_valid_blocks;
4195 sbi->discard_blks -= se->valid_blocks;
4196 }
4197
4198 if (__is_large_section(sbi)) {
4199 get_sec_entry(sbi, start)->valid_blocks +=
4200 se->valid_blocks;
4201 get_sec_entry(sbi, start)->valid_blocks -=
4202 old_valid_blocks;
4203 }
4204 }
4205 up_read(&curseg->journal_rwsem);
4206
4207 if (!err && total_node_blocks != valid_node_count(sbi)) {
4208 f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
4209 total_node_blocks, valid_node_count(sbi));
4210 err = -EFSCORRUPTED;
4211 }
4212
4213 return err;
4214}
4215
4216static void init_free_segmap(struct f2fs_sb_info *sbi)
4217{
4218 unsigned int start;
4219 int type;
4220
4221 for (start = 0; start < MAIN_SEGS(sbi); start++) {
4222 struct seg_entry *sentry = get_seg_entry(sbi, start);
4223 if (!sentry->valid_blocks)
4224 __set_free(sbi, start);
4225 else
4226 SIT_I(sbi)->written_valid_blocks +=
4227 sentry->valid_blocks;
4228 }
4229
4230
4231 for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4232 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4233 __set_test_and_inuse(sbi, curseg_t->segno);
4234 }
4235}
4236
4237static void init_dirty_segmap(struct f2fs_sb_info *sbi)
4238{
4239 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4240 struct free_segmap_info *free_i = FREE_I(sbi);
4241 unsigned int segno = 0, offset = 0;
4242 unsigned short valid_blocks;
4243
4244 while (1) {
4245
4246 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
4247 if (segno >= MAIN_SEGS(sbi))
4248 break;
4249 offset = segno + 1;
4250 valid_blocks = get_valid_blocks(sbi, segno, false);
4251 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
4252 continue;
4253 if (valid_blocks > sbi->blocks_per_seg) {
4254 f2fs_bug_on(sbi, 1);
4255 continue;
4256 }
4257 mutex_lock(&dirty_i->seglist_lock);
4258 __locate_dirty_segment(sbi, segno, DIRTY);
4259 mutex_unlock(&dirty_i->seglist_lock);
4260 }
4261}
4262
4263static int init_victim_secmap(struct f2fs_sb_info *sbi)
4264{
4265 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4266 unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4267
4268 dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4269 if (!dirty_i->victim_secmap)
4270 return -ENOMEM;
4271 return 0;
4272}
4273
4274static int build_dirty_segmap(struct f2fs_sb_info *sbi)
4275{
4276 struct dirty_seglist_info *dirty_i;
4277 unsigned int bitmap_size, i;
4278
4279
4280 dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
4281 GFP_KERNEL);
4282 if (!dirty_i)
4283 return -ENOMEM;
4284
4285 SM_I(sbi)->dirty_info = dirty_i;
4286 mutex_init(&dirty_i->seglist_lock);
4287
4288 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4289
4290 for (i = 0; i < NR_DIRTY_TYPE; i++) {
4291 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
4292 GFP_KERNEL);
4293 if (!dirty_i->dirty_segmap[i])
4294 return -ENOMEM;
4295 }
4296
4297 init_dirty_segmap(sbi);
4298 return init_victim_secmap(sbi);
4299}
4300
4301static int sanity_check_curseg(struct f2fs_sb_info *sbi)
4302{
4303 int i;
4304
4305
4306
4307
4308
4309 for (i = 0; i < NO_CHECK_TYPE; i++) {
4310 struct curseg_info *curseg = CURSEG_I(sbi, i);
4311 struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
4312 unsigned int blkofs = curseg->next_blkoff;
4313
4314 if (f2fs_test_bit(blkofs, se->cur_valid_map))
4315 goto out;
4316
4317 if (curseg->alloc_type == SSR)
4318 continue;
4319
4320 for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
4321 if (!f2fs_test_bit(blkofs, se->cur_valid_map))
4322 continue;
4323out:
4324 f2fs_err(sbi,
4325 "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
4326 i, curseg->segno, curseg->alloc_type,
4327 curseg->next_blkoff, blkofs);
4328 return -EFSCORRUPTED;
4329 }
4330 }
4331 return 0;
4332}
4333
4334
4335
4336
4337static void init_min_max_mtime(struct f2fs_sb_info *sbi)
4338{
4339 struct sit_info *sit_i = SIT_I(sbi);
4340 unsigned int segno;
4341
4342 down_write(&sit_i->sentry_lock);
4343
4344 sit_i->min_mtime = ULLONG_MAX;
4345
4346 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
4347 unsigned int i;
4348 unsigned long long mtime = 0;
4349
4350 for (i = 0; i < sbi->segs_per_sec; i++)
4351 mtime += get_seg_entry(sbi, segno + i)->mtime;
4352
4353 mtime = div_u64(mtime, sbi->segs_per_sec);
4354
4355 if (sit_i->min_mtime > mtime)
4356 sit_i->min_mtime = mtime;
4357 }
4358 sit_i->max_mtime = get_mtime(sbi, false);
4359 up_write(&sit_i->sentry_lock);
4360}
4361
4362int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
4363{
4364 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4365 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
4366 struct f2fs_sm_info *sm_info;
4367 int err;
4368
4369 sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
4370 if (!sm_info)
4371 return -ENOMEM;
4372
4373
4374 sbi->sm_info = sm_info;
4375 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
4376 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
4377 sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
4378 sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
4379 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
4380 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
4381 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
4382 sm_info->rec_prefree_segments = sm_info->main_segments *
4383 DEF_RECLAIM_PREFREE_SEGMENTS / 100;
4384 if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
4385 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
4386
4387 if (!test_opt(sbi, LFS))
4388 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
4389 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
4390 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
4391 sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
4392 sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
4393 sm_info->min_ssr_sections = reserved_sections(sbi);
4394
4395 INIT_LIST_HEAD(&sm_info->sit_entry_set);
4396
4397 init_rwsem(&sm_info->curseg_lock);
4398
4399 if (!f2fs_readonly(sbi->sb)) {
4400 err = f2fs_create_flush_cmd_control(sbi);
4401 if (err)
4402 return err;
4403 }
4404
4405 err = create_discard_cmd_control(sbi);
4406 if (err)
4407 return err;
4408
4409 err = build_sit_info(sbi);
4410 if (err)
4411 return err;
4412 err = build_free_segmap(sbi);
4413 if (err)
4414 return err;
4415 err = build_curseg(sbi);
4416 if (err)
4417 return err;
4418
4419
4420 err = build_sit_entries(sbi);
4421 if (err)
4422 return err;
4423
4424 init_free_segmap(sbi);
4425 err = build_dirty_segmap(sbi);
4426 if (err)
4427 return err;
4428
4429 err = sanity_check_curseg(sbi);
4430 if (err)
4431 return err;
4432
4433 init_min_max_mtime(sbi);
4434 return 0;
4435}
4436
4437static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
4438 enum dirty_type dirty_type)
4439{
4440 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4441
4442 mutex_lock(&dirty_i->seglist_lock);
4443 kvfree(dirty_i->dirty_segmap[dirty_type]);
4444 dirty_i->nr_dirty[dirty_type] = 0;
4445 mutex_unlock(&dirty_i->seglist_lock);
4446}
4447
4448static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
4449{
4450 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4451 kvfree(dirty_i->victim_secmap);
4452}
4453
4454static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
4455{
4456 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4457 int i;
4458
4459 if (!dirty_i)
4460 return;
4461
4462
4463 for (i = 0; i < NR_DIRTY_TYPE; i++)
4464 discard_dirty_segmap(sbi, i);
4465
4466 destroy_victim_secmap(sbi);
4467 SM_I(sbi)->dirty_info = NULL;
4468 kvfree(dirty_i);
4469}
4470
4471static void destroy_curseg(struct f2fs_sb_info *sbi)
4472{
4473 struct curseg_info *array = SM_I(sbi)->curseg_array;
4474 int i;
4475
4476 if (!array)
4477 return;
4478 SM_I(sbi)->curseg_array = NULL;
4479 for (i = 0; i < NR_CURSEG_TYPE; i++) {
4480 kvfree(array[i].sum_blk);
4481 kvfree(array[i].journal);
4482 }
4483 kvfree(array);
4484}
4485
4486static void destroy_free_segmap(struct f2fs_sb_info *sbi)
4487{
4488 struct free_segmap_info *free_i = SM_I(sbi)->free_info;
4489 if (!free_i)
4490 return;
4491 SM_I(sbi)->free_info = NULL;
4492 kvfree(free_i->free_segmap);
4493 kvfree(free_i->free_secmap);
4494 kvfree(free_i);
4495}
4496
4497static void destroy_sit_info(struct f2fs_sb_info *sbi)
4498{
4499 struct sit_info *sit_i = SIT_I(sbi);
4500
4501 if (!sit_i)
4502 return;
4503
4504 if (sit_i->sentries)
4505 kvfree(sit_i->bitmap);
4506 kvfree(sit_i->tmp_map);
4507
4508 kvfree(sit_i->sentries);
4509 kvfree(sit_i->sec_entries);
4510 kvfree(sit_i->dirty_sentries_bitmap);
4511
4512 SM_I(sbi)->sit_info = NULL;
4513 kvfree(sit_i->sit_bitmap);
4514#ifdef CONFIG_F2FS_CHECK_FS
4515 kvfree(sit_i->sit_bitmap_mir);
4516 kvfree(sit_i->invalid_segmap);
4517#endif
4518 kvfree(sit_i);
4519}
4520
4521void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
4522{
4523 struct f2fs_sm_info *sm_info = SM_I(sbi);
4524
4525 if (!sm_info)
4526 return;
4527 f2fs_destroy_flush_cmd_control(sbi, true);
4528 destroy_discard_cmd_control(sbi);
4529 destroy_dirty_segmap(sbi);
4530 destroy_curseg(sbi);
4531 destroy_free_segmap(sbi);
4532 destroy_sit_info(sbi);
4533 sbi->sm_info = NULL;
4534 kvfree(sm_info);
4535}
4536
4537int __init f2fs_create_segment_manager_caches(void)
4538{
4539 discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
4540 sizeof(struct discard_entry));
4541 if (!discard_entry_slab)
4542 goto fail;
4543
4544 discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
4545 sizeof(struct discard_cmd));
4546 if (!discard_cmd_slab)
4547 goto destroy_discard_entry;
4548
4549 sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
4550 sizeof(struct sit_entry_set));
4551 if (!sit_entry_set_slab)
4552 goto destroy_discard_cmd;
4553
4554 inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
4555 sizeof(struct inmem_pages));
4556 if (!inmem_entry_slab)
4557 goto destroy_sit_entry_set;
4558 return 0;
4559
4560destroy_sit_entry_set:
4561 kmem_cache_destroy(sit_entry_set_slab);
4562destroy_discard_cmd:
4563 kmem_cache_destroy(discard_cmd_slab);
4564destroy_discard_entry:
4565 kmem_cache_destroy(discard_entry_slab);
4566fail:
4567 return -ENOMEM;
4568}
4569
4570void f2fs_destroy_segment_manager_caches(void)
4571{
4572 kmem_cache_destroy(sit_entry_set_slab);
4573 kmem_cache_destroy(discard_cmd_slab);
4574 kmem_cache_destroy(discard_entry_slab);
4575 kmem_cache_destroy(inmem_entry_slab);
4576}
4577