1
2
3
4
5
6
7
8#include <linux/fs.h>
9#include <linux/f2fs_fs.h>
10#include <linux/bio.h>
11#include <linux/blkdev.h>
12#include <linux/prefetch.h>
13#include <linux/kthread.h>
14#include <linux/swap.h>
15#include <linux/timer.h>
16#include <linux/freezer.h>
17#include <linux/sched/signal.h>
18
19#include "f2fs.h"
20#include "segment.h"
21#include "node.h"
22#include "gc.h"
23#include "trace.h"
24#include <trace/events/f2fs.h>
25
26#define __reverse_ffz(x) __reverse_ffs(~(x))
27
28static struct kmem_cache *discard_entry_slab;
29static struct kmem_cache *discard_cmd_slab;
30static struct kmem_cache *sit_entry_set_slab;
31static struct kmem_cache *inmem_entry_slab;
32
33static unsigned long __reverse_ulong(unsigned char *str)
34{
35 unsigned long tmp = 0;
36 int shift = 24, idx = 0;
37
38#if BITS_PER_LONG == 64
39 shift = 56;
40#endif
41 while (shift >= 0) {
42 tmp |= (unsigned long)str[idx++] << shift;
43 shift -= BITS_PER_BYTE;
44 }
45 return tmp;
46}
47
48
49
50
51
52static inline unsigned long __reverse_ffs(unsigned long word)
53{
54 int num = 0;
55
56#if BITS_PER_LONG == 64
57 if ((word & 0xffffffff00000000UL) == 0)
58 num += 32;
59 else
60 word >>= 32;
61#endif
62 if ((word & 0xffff0000) == 0)
63 num += 16;
64 else
65 word >>= 16;
66
67 if ((word & 0xff00) == 0)
68 num += 8;
69 else
70 word >>= 8;
71
72 if ((word & 0xf0) == 0)
73 num += 4;
74 else
75 word >>= 4;
76
77 if ((word & 0xc) == 0)
78 num += 2;
79 else
80 word >>= 2;
81
82 if ((word & 0x2) == 0)
83 num += 1;
84 return num;
85}
86
87
88
89
90
91
92
93
94
95
96static unsigned long __find_rev_next_bit(const unsigned long *addr,
97 unsigned long size, unsigned long offset)
98{
99 const unsigned long *p = addr + BIT_WORD(offset);
100 unsigned long result = size;
101 unsigned long tmp;
102
103 if (offset >= size)
104 return size;
105
106 size -= (offset & ~(BITS_PER_LONG - 1));
107 offset %= BITS_PER_LONG;
108
109 while (1) {
110 if (*p == 0)
111 goto pass;
112
113 tmp = __reverse_ulong((unsigned char *)p);
114
115 tmp &= ~0UL >> offset;
116 if (size < BITS_PER_LONG)
117 tmp &= (~0UL << (BITS_PER_LONG - size));
118 if (tmp)
119 goto found;
120pass:
121 if (size <= BITS_PER_LONG)
122 break;
123 size -= BITS_PER_LONG;
124 offset = 0;
125 p++;
126 }
127 return result;
128found:
129 return result - size + __reverse_ffs(tmp);
130}
131
132static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
133 unsigned long size, unsigned long offset)
134{
135 const unsigned long *p = addr + BIT_WORD(offset);
136 unsigned long result = size;
137 unsigned long tmp;
138
139 if (offset >= size)
140 return size;
141
142 size -= (offset & ~(BITS_PER_LONG - 1));
143 offset %= BITS_PER_LONG;
144
145 while (1) {
146 if (*p == ~0UL)
147 goto pass;
148
149 tmp = __reverse_ulong((unsigned char *)p);
150
151 if (offset)
152 tmp |= ~0UL << (BITS_PER_LONG - offset);
153 if (size < BITS_PER_LONG)
154 tmp |= ~0UL >> size;
155 if (tmp != ~0UL)
156 goto found;
157pass:
158 if (size <= BITS_PER_LONG)
159 break;
160 size -= BITS_PER_LONG;
161 offset = 0;
162 p++;
163 }
164 return result;
165found:
166 return result - size + __reverse_ffz(tmp);
167}
168
169bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
170{
171 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
172 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
173 int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
174
175 if (test_opt(sbi, LFS))
176 return false;
177 if (sbi->gc_mode == GC_URGENT)
178 return true;
179 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
180 return true;
181
182 return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
183 SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
184}
185
186void f2fs_register_inmem_page(struct inode *inode, struct page *page)
187{
188 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
189 struct f2fs_inode_info *fi = F2FS_I(inode);
190 struct inmem_pages *new;
191
192 f2fs_trace_pid(page);
193
194 f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
195
196 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
197
198
199 new->page = page;
200 INIT_LIST_HEAD(&new->list);
201
202
203 mutex_lock(&fi->inmem_lock);
204 get_page(page);
205 list_add_tail(&new->list, &fi->inmem_pages);
206 spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
207 if (list_empty(&fi->inmem_ilist))
208 list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
209 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
210 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
211 mutex_unlock(&fi->inmem_lock);
212
213 trace_f2fs_register_inmem_page(page, INMEM);
214}
215
216static int __revoke_inmem_pages(struct inode *inode,
217 struct list_head *head, bool drop, bool recover,
218 bool trylock)
219{
220 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
221 struct inmem_pages *cur, *tmp;
222 int err = 0;
223
224 list_for_each_entry_safe(cur, tmp, head, list) {
225 struct page *page = cur->page;
226
227 if (drop)
228 trace_f2fs_commit_inmem_page(page, INMEM_DROP);
229
230 if (trylock) {
231
232
233
234
235 if (!trylock_page(page))
236 continue;
237 } else {
238 lock_page(page);
239 }
240
241 f2fs_wait_on_page_writeback(page, DATA, true, true);
242
243 if (recover) {
244 struct dnode_of_data dn;
245 struct node_info ni;
246
247 trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
248retry:
249 set_new_dnode(&dn, inode, NULL, NULL, 0);
250 err = f2fs_get_dnode_of_data(&dn, page->index,
251 LOOKUP_NODE);
252 if (err) {
253 if (err == -ENOMEM) {
254 congestion_wait(BLK_RW_ASYNC, HZ/50);
255 cond_resched();
256 goto retry;
257 }
258 err = -EAGAIN;
259 goto next;
260 }
261
262 err = f2fs_get_node_info(sbi, dn.nid, &ni);
263 if (err) {
264 f2fs_put_dnode(&dn);
265 return err;
266 }
267
268 if (cur->old_addr == NEW_ADDR) {
269 f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
270 f2fs_update_data_blkaddr(&dn, NEW_ADDR);
271 } else
272 f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
273 cur->old_addr, ni.version, true, true);
274 f2fs_put_dnode(&dn);
275 }
276next:
277
278 if (drop || recover) {
279 ClearPageUptodate(page);
280 clear_cold_data(page);
281 }
282 f2fs_clear_page_private(page);
283 f2fs_put_page(page, 1);
284
285 list_del(&cur->list);
286 kmem_cache_free(inmem_entry_slab, cur);
287 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
288 }
289 return err;
290}
291
292void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
293{
294 struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
295 struct inode *inode;
296 struct f2fs_inode_info *fi;
297next:
298 spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
299 if (list_empty(head)) {
300 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
301 return;
302 }
303 fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
304 inode = igrab(&fi->vfs_inode);
305 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
306
307 if (inode) {
308 if (gc_failure) {
309 if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
310 goto drop;
311 goto skip;
312 }
313drop:
314 set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
315 f2fs_drop_inmem_pages(inode);
316 iput(inode);
317 }
318skip:
319 congestion_wait(BLK_RW_ASYNC, HZ/50);
320 cond_resched();
321 goto next;
322}
323
324void f2fs_drop_inmem_pages(struct inode *inode)
325{
326 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
327 struct f2fs_inode_info *fi = F2FS_I(inode);
328
329 while (!list_empty(&fi->inmem_pages)) {
330 mutex_lock(&fi->inmem_lock);
331 __revoke_inmem_pages(inode, &fi->inmem_pages,
332 true, false, true);
333
334 if (list_empty(&fi->inmem_pages)) {
335 spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
336 if (!list_empty(&fi->inmem_ilist))
337 list_del_init(&fi->inmem_ilist);
338 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
339 }
340 mutex_unlock(&fi->inmem_lock);
341 }
342
343 clear_inode_flag(inode, FI_ATOMIC_FILE);
344 fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
345 stat_dec_atomic_write(inode);
346}
347
348void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
349{
350 struct f2fs_inode_info *fi = F2FS_I(inode);
351 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
352 struct list_head *head = &fi->inmem_pages;
353 struct inmem_pages *cur = NULL;
354
355 f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page));
356
357 mutex_lock(&fi->inmem_lock);
358 list_for_each_entry(cur, head, list) {
359 if (cur->page == page)
360 break;
361 }
362
363 f2fs_bug_on(sbi, list_empty(head) || cur->page != page);
364 list_del(&cur->list);
365 mutex_unlock(&fi->inmem_lock);
366
367 dec_page_count(sbi, F2FS_INMEM_PAGES);
368 kmem_cache_free(inmem_entry_slab, cur);
369
370 ClearPageUptodate(page);
371 f2fs_clear_page_private(page);
372 f2fs_put_page(page, 0);
373
374 trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
375}
376
377static int __f2fs_commit_inmem_pages(struct inode *inode)
378{
379 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
380 struct f2fs_inode_info *fi = F2FS_I(inode);
381 struct inmem_pages *cur, *tmp;
382 struct f2fs_io_info fio = {
383 .sbi = sbi,
384 .ino = inode->i_ino,
385 .type = DATA,
386 .op = REQ_OP_WRITE,
387 .op_flags = REQ_SYNC | REQ_PRIO,
388 .io_type = FS_DATA_IO,
389 };
390 struct list_head revoke_list;
391 bool submit_bio = false;
392 int err = 0;
393
394 INIT_LIST_HEAD(&revoke_list);
395
396 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
397 struct page *page = cur->page;
398
399 lock_page(page);
400 if (page->mapping == inode->i_mapping) {
401 trace_f2fs_commit_inmem_page(page, INMEM);
402
403 f2fs_wait_on_page_writeback(page, DATA, true, true);
404
405 set_page_dirty(page);
406 if (clear_page_dirty_for_io(page)) {
407 inode_dec_dirty_pages(inode);
408 f2fs_remove_dirty_inode(inode);
409 }
410retry:
411 fio.page = page;
412 fio.old_blkaddr = NULL_ADDR;
413 fio.encrypted_page = NULL;
414 fio.need_lock = LOCK_DONE;
415 err = f2fs_do_write_data_page(&fio);
416 if (err) {
417 if (err == -ENOMEM) {
418 congestion_wait(BLK_RW_ASYNC, HZ/50);
419 cond_resched();
420 goto retry;
421 }
422 unlock_page(page);
423 break;
424 }
425
426 cur->old_addr = fio.old_blkaddr;
427 submit_bio = true;
428 }
429 unlock_page(page);
430 list_move_tail(&cur->list, &revoke_list);
431 }
432
433 if (submit_bio)
434 f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
435
436 if (err) {
437
438
439
440
441
442
443
444
445 err = __revoke_inmem_pages(inode, &revoke_list,
446 false, true, false);
447
448
449 __revoke_inmem_pages(inode, &fi->inmem_pages,
450 true, false, false);
451 } else {
452 __revoke_inmem_pages(inode, &revoke_list,
453 false, false, false);
454 }
455
456 return err;
457}
458
459int f2fs_commit_inmem_pages(struct inode *inode)
460{
461 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
462 struct f2fs_inode_info *fi = F2FS_I(inode);
463 int err;
464
465 f2fs_balance_fs(sbi, true);
466
467 down_write(&fi->i_gc_rwsem[WRITE]);
468
469 f2fs_lock_op(sbi);
470 set_inode_flag(inode, FI_ATOMIC_COMMIT);
471
472 mutex_lock(&fi->inmem_lock);
473 err = __f2fs_commit_inmem_pages(inode);
474
475 spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
476 if (!list_empty(&fi->inmem_ilist))
477 list_del_init(&fi->inmem_ilist);
478 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
479 mutex_unlock(&fi->inmem_lock);
480
481 clear_inode_flag(inode, FI_ATOMIC_COMMIT);
482
483 f2fs_unlock_op(sbi);
484 up_write(&fi->i_gc_rwsem[WRITE]);
485
486 return err;
487}
488
489
490
491
492
493void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
494{
495 if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
496 f2fs_show_injection_info(FAULT_CHECKPOINT);
497 f2fs_stop_checkpoint(sbi, false);
498 }
499
500
501 if (need && excess_cached_nats(sbi))
502 f2fs_balance_fs_bg(sbi);
503
504 if (f2fs_is_checkpoint_ready(sbi))
505 return;
506
507
508
509
510
511 if (has_not_enough_free_secs(sbi, 0, 0)) {
512 mutex_lock(&sbi->gc_mutex);
513 f2fs_gc(sbi, false, false, NULL_SEGNO);
514 }
515}
516
517void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
518{
519 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
520 return;
521
522
523 if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
524 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
525
526
527 if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
528 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
529
530 if (!f2fs_available_free_memory(sbi, FREE_NIDS))
531 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
532 else
533 f2fs_build_free_nids(sbi, false, false);
534
535 if (!is_idle(sbi, REQ_TIME) &&
536 (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
537 return;
538
539
540 if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
541 !f2fs_available_free_memory(sbi, INO_ENTRIES) ||
542 excess_prefree_segs(sbi) ||
543 excess_dirty_nats(sbi) ||
544 excess_dirty_nodes(sbi) ||
545 f2fs_time_over(sbi, CP_TIME)) {
546 if (test_opt(sbi, DATA_FLUSH)) {
547 struct blk_plug plug;
548
549 mutex_lock(&sbi->flush_lock);
550
551 blk_start_plug(&plug);
552 f2fs_sync_dirty_inodes(sbi, FILE_INODE);
553 blk_finish_plug(&plug);
554
555 mutex_unlock(&sbi->flush_lock);
556 }
557 f2fs_sync_fs(sbi->sb, true);
558 stat_inc_bg_cp_count(sbi->stat_info);
559 }
560}
561
562static int __submit_flush_wait(struct f2fs_sb_info *sbi,
563 struct block_device *bdev)
564{
565 struct bio *bio;
566 int ret;
567
568 bio = f2fs_bio_alloc(sbi, 0, false);
569 if (!bio)
570 return -ENOMEM;
571
572 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
573 bio_set_dev(bio, bdev);
574 ret = submit_bio_wait(bio);
575 bio_put(bio);
576
577 trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
578 test_opt(sbi, FLUSH_MERGE), ret);
579 return ret;
580}
581
582static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
583{
584 int ret = 0;
585 int i;
586
587 if (!f2fs_is_multi_device(sbi))
588 return __submit_flush_wait(sbi, sbi->sb->s_bdev);
589
590 for (i = 0; i < sbi->s_ndevs; i++) {
591 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
592 continue;
593 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
594 if (ret)
595 break;
596 }
597 return ret;
598}
599
600static int issue_flush_thread(void *data)
601{
602 struct f2fs_sb_info *sbi = data;
603 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
604 wait_queue_head_t *q = &fcc->flush_wait_queue;
605repeat:
606 if (kthread_should_stop())
607 return 0;
608
609 sb_start_intwrite(sbi->sb);
610
611 if (!llist_empty(&fcc->issue_list)) {
612 struct flush_cmd *cmd, *next;
613 int ret;
614
615 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
616 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
617
618 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
619
620 ret = submit_flush_wait(sbi, cmd->ino);
621 atomic_inc(&fcc->issued_flush);
622
623 llist_for_each_entry_safe(cmd, next,
624 fcc->dispatch_list, llnode) {
625 cmd->ret = ret;
626 complete(&cmd->wait);
627 }
628 fcc->dispatch_list = NULL;
629 }
630
631 sb_end_intwrite(sbi->sb);
632
633 wait_event_interruptible(*q,
634 kthread_should_stop() || !llist_empty(&fcc->issue_list));
635 goto repeat;
636}
637
638int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
639{
640 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
641 struct flush_cmd cmd;
642 int ret;
643
644 if (test_opt(sbi, NOBARRIER))
645 return 0;
646
647 if (!test_opt(sbi, FLUSH_MERGE)) {
648 atomic_inc(&fcc->queued_flush);
649 ret = submit_flush_wait(sbi, ino);
650 atomic_dec(&fcc->queued_flush);
651 atomic_inc(&fcc->issued_flush);
652 return ret;
653 }
654
655 if (atomic_inc_return(&fcc->queued_flush) == 1 ||
656 f2fs_is_multi_device(sbi)) {
657 ret = submit_flush_wait(sbi, ino);
658 atomic_dec(&fcc->queued_flush);
659
660 atomic_inc(&fcc->issued_flush);
661 return ret;
662 }
663
664 cmd.ino = ino;
665 init_completion(&cmd.wait);
666
667 llist_add(&cmd.llnode, &fcc->issue_list);
668
669
670 smp_mb();
671
672 if (waitqueue_active(&fcc->flush_wait_queue))
673 wake_up(&fcc->flush_wait_queue);
674
675 if (fcc->f2fs_issue_flush) {
676 wait_for_completion(&cmd.wait);
677 atomic_dec(&fcc->queued_flush);
678 } else {
679 struct llist_node *list;
680
681 list = llist_del_all(&fcc->issue_list);
682 if (!list) {
683 wait_for_completion(&cmd.wait);
684 atomic_dec(&fcc->queued_flush);
685 } else {
686 struct flush_cmd *tmp, *next;
687
688 ret = submit_flush_wait(sbi, ino);
689
690 llist_for_each_entry_safe(tmp, next, list, llnode) {
691 if (tmp == &cmd) {
692 cmd.ret = ret;
693 atomic_dec(&fcc->queued_flush);
694 continue;
695 }
696 tmp->ret = ret;
697 complete(&tmp->wait);
698 }
699 }
700 }
701
702 return cmd.ret;
703}
704
705int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
706{
707 dev_t dev = sbi->sb->s_bdev->bd_dev;
708 struct flush_cmd_control *fcc;
709 int err = 0;
710
711 if (SM_I(sbi)->fcc_info) {
712 fcc = SM_I(sbi)->fcc_info;
713 if (fcc->f2fs_issue_flush)
714 return err;
715 goto init_thread;
716 }
717
718 fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
719 if (!fcc)
720 return -ENOMEM;
721 atomic_set(&fcc->issued_flush, 0);
722 atomic_set(&fcc->queued_flush, 0);
723 init_waitqueue_head(&fcc->flush_wait_queue);
724 init_llist_head(&fcc->issue_list);
725 SM_I(sbi)->fcc_info = fcc;
726 if (!test_opt(sbi, FLUSH_MERGE))
727 return err;
728
729init_thread:
730 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
731 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
732 if (IS_ERR(fcc->f2fs_issue_flush)) {
733 err = PTR_ERR(fcc->f2fs_issue_flush);
734 kvfree(fcc);
735 SM_I(sbi)->fcc_info = NULL;
736 return err;
737 }
738
739 return err;
740}
741
742void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
743{
744 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
745
746 if (fcc && fcc->f2fs_issue_flush) {
747 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
748
749 fcc->f2fs_issue_flush = NULL;
750 kthread_stop(flush_thread);
751 }
752 if (free) {
753 kvfree(fcc);
754 SM_I(sbi)->fcc_info = NULL;
755 }
756}
757
758int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
759{
760 int ret = 0, i;
761
762 if (!f2fs_is_multi_device(sbi))
763 return 0;
764
765 for (i = 1; i < sbi->s_ndevs; i++) {
766 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
767 continue;
768 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
769 if (ret)
770 break;
771
772 spin_lock(&sbi->dev_lock);
773 f2fs_clear_bit(i, (char *)&sbi->dirty_device);
774 spin_unlock(&sbi->dev_lock);
775 }
776
777 return ret;
778}
779
780static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
781 enum dirty_type dirty_type)
782{
783 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
784
785
786 if (IS_CURSEG(sbi, segno))
787 return;
788
789 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
790 dirty_i->nr_dirty[dirty_type]++;
791
792 if (dirty_type == DIRTY) {
793 struct seg_entry *sentry = get_seg_entry(sbi, segno);
794 enum dirty_type t = sentry->type;
795
796 if (unlikely(t >= DIRTY)) {
797 f2fs_bug_on(sbi, 1);
798 return;
799 }
800 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
801 dirty_i->nr_dirty[t]++;
802 }
803}
804
805static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
806 enum dirty_type dirty_type)
807{
808 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
809
810 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
811 dirty_i->nr_dirty[dirty_type]--;
812
813 if (dirty_type == DIRTY) {
814 struct seg_entry *sentry = get_seg_entry(sbi, segno);
815 enum dirty_type t = sentry->type;
816
817 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
818 dirty_i->nr_dirty[t]--;
819
820 if (get_valid_blocks(sbi, segno, true) == 0)
821 clear_bit(GET_SEC_FROM_SEG(sbi, segno),
822 dirty_i->victim_secmap);
823 }
824}
825
826
827
828
829
830
831static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
832{
833 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
834 unsigned short valid_blocks, ckpt_valid_blocks;
835
836 if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
837 return;
838
839 mutex_lock(&dirty_i->seglist_lock);
840
841 valid_blocks = get_valid_blocks(sbi, segno, false);
842 ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
843
844 if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
845 ckpt_valid_blocks == sbi->blocks_per_seg)) {
846 __locate_dirty_segment(sbi, segno, PRE);
847 __remove_dirty_segment(sbi, segno, DIRTY);
848 } else if (valid_blocks < sbi->blocks_per_seg) {
849 __locate_dirty_segment(sbi, segno, DIRTY);
850 } else {
851
852 __remove_dirty_segment(sbi, segno, DIRTY);
853 }
854
855 mutex_unlock(&dirty_i->seglist_lock);
856}
857
858
859void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
860{
861 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
862 unsigned int segno;
863
864 mutex_lock(&dirty_i->seglist_lock);
865 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
866 if (get_valid_blocks(sbi, segno, false))
867 continue;
868 if (IS_CURSEG(sbi, segno))
869 continue;
870 __locate_dirty_segment(sbi, segno, PRE);
871 __remove_dirty_segment(sbi, segno, DIRTY);
872 }
873 mutex_unlock(&dirty_i->seglist_lock);
874}
875
876block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
877{
878 int ovp_hole_segs =
879 (overprovision_segments(sbi) - reserved_segments(sbi));
880 block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
881 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
882 block_t holes[2] = {0, 0};
883 block_t unusable;
884 struct seg_entry *se;
885 unsigned int segno;
886
887 mutex_lock(&dirty_i->seglist_lock);
888 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
889 se = get_seg_entry(sbi, segno);
890 if (IS_NODESEG(se->type))
891 holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
892 else
893 holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
894 }
895 mutex_unlock(&dirty_i->seglist_lock);
896
897 unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE];
898 if (unusable > ovp_holes)
899 return unusable - ovp_holes;
900 return 0;
901}
902
903int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
904{
905 int ovp_hole_segs =
906 (overprovision_segments(sbi) - reserved_segments(sbi));
907 if (unusable > F2FS_OPTION(sbi).unusable_cap)
908 return -EAGAIN;
909 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
910 dirty_segments(sbi) > ovp_hole_segs)
911 return -EAGAIN;
912 return 0;
913}
914
915
916static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
917{
918 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
919 unsigned int segno = 0;
920
921 mutex_lock(&dirty_i->seglist_lock);
922 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
923 if (get_valid_blocks(sbi, segno, false))
924 continue;
925 if (get_ckpt_valid_blocks(sbi, segno))
926 continue;
927 mutex_unlock(&dirty_i->seglist_lock);
928 return segno;
929 }
930 mutex_unlock(&dirty_i->seglist_lock);
931 return NULL_SEGNO;
932}
933
934static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
935 struct block_device *bdev, block_t lstart,
936 block_t start, block_t len)
937{
938 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
939 struct list_head *pend_list;
940 struct discard_cmd *dc;
941
942 f2fs_bug_on(sbi, !len);
943
944 pend_list = &dcc->pend_list[plist_idx(len)];
945
946 dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
947 INIT_LIST_HEAD(&dc->list);
948 dc->bdev = bdev;
949 dc->lstart = lstart;
950 dc->start = start;
951 dc->len = len;
952 dc->ref = 0;
953 dc->state = D_PREP;
954 dc->queued = 0;
955 dc->error = 0;
956 init_completion(&dc->wait);
957 list_add_tail(&dc->list, pend_list);
958 spin_lock_init(&dc->lock);
959 dc->bio_ref = 0;
960 atomic_inc(&dcc->discard_cmd_cnt);
961 dcc->undiscard_blks += len;
962
963 return dc;
964}
965
966static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
967 struct block_device *bdev, block_t lstart,
968 block_t start, block_t len,
969 struct rb_node *parent, struct rb_node **p,
970 bool leftmost)
971{
972 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
973 struct discard_cmd *dc;
974
975 dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
976
977 rb_link_node(&dc->rb_node, parent, p);
978 rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
979
980 return dc;
981}
982
983static void __detach_discard_cmd(struct discard_cmd_control *dcc,
984 struct discard_cmd *dc)
985{
986 if (dc->state == D_DONE)
987 atomic_sub(dc->queued, &dcc->queued_discard);
988
989 list_del(&dc->list);
990 rb_erase_cached(&dc->rb_node, &dcc->root);
991 dcc->undiscard_blks -= dc->len;
992
993 kmem_cache_free(discard_cmd_slab, dc);
994
995 atomic_dec(&dcc->discard_cmd_cnt);
996}
997
998static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
999 struct discard_cmd *dc)
1000{
1001 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1002 unsigned long flags;
1003
1004 trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
1005
1006 spin_lock_irqsave(&dc->lock, flags);
1007 if (dc->bio_ref) {
1008 spin_unlock_irqrestore(&dc->lock, flags);
1009 return;
1010 }
1011 spin_unlock_irqrestore(&dc->lock, flags);
1012
1013 f2fs_bug_on(sbi, dc->ref);
1014
1015 if (dc->error == -EOPNOTSUPP)
1016 dc->error = 0;
1017
1018 if (dc->error)
1019 printk_ratelimited(
1020 "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d",
1021 KERN_INFO, dc->lstart, dc->start, dc->len, dc->error);
1022 __detach_discard_cmd(dcc, dc);
1023}
1024
1025static void f2fs_submit_discard_endio(struct bio *bio)
1026{
1027 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
1028 unsigned long flags;
1029
1030 dc->error = blk_status_to_errno(bio->bi_status);
1031
1032 spin_lock_irqsave(&dc->lock, flags);
1033 dc->bio_ref--;
1034 if (!dc->bio_ref && dc->state == D_SUBMIT) {
1035 dc->state = D_DONE;
1036 complete_all(&dc->wait);
1037 }
1038 spin_unlock_irqrestore(&dc->lock, flags);
1039 bio_put(bio);
1040}
1041
1042static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
1043 block_t start, block_t end)
1044{
1045#ifdef CONFIG_F2FS_CHECK_FS
1046 struct seg_entry *sentry;
1047 unsigned int segno;
1048 block_t blk = start;
1049 unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
1050 unsigned long *map;
1051
1052 while (blk < end) {
1053 segno = GET_SEGNO(sbi, blk);
1054 sentry = get_seg_entry(sbi, segno);
1055 offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
1056
1057 if (end < START_BLOCK(sbi, segno + 1))
1058 size = GET_BLKOFF_FROM_SEG0(sbi, end);
1059 else
1060 size = max_blocks;
1061 map = (unsigned long *)(sentry->cur_valid_map);
1062 offset = __find_rev_next_bit(map, size, offset);
1063 f2fs_bug_on(sbi, offset != size);
1064 blk = START_BLOCK(sbi, segno + 1);
1065 }
1066#endif
1067}
1068
1069static void __init_discard_policy(struct f2fs_sb_info *sbi,
1070 struct discard_policy *dpolicy,
1071 int discard_type, unsigned int granularity)
1072{
1073
1074 dpolicy->type = discard_type;
1075 dpolicy->sync = true;
1076 dpolicy->ordered = false;
1077 dpolicy->granularity = granularity;
1078
1079 dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
1080 dpolicy->io_aware_gran = MAX_PLIST_NUM;
1081 dpolicy->timeout = 0;
1082
1083 if (discard_type == DPOLICY_BG) {
1084 dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1085 dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1086 dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1087 dpolicy->io_aware = true;
1088 dpolicy->sync = false;
1089 dpolicy->ordered = true;
1090 if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
1091 dpolicy->granularity = 1;
1092 dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1093 }
1094 } else if (discard_type == DPOLICY_FORCE) {
1095 dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1096 dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1097 dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1098 dpolicy->io_aware = false;
1099 } else if (discard_type == DPOLICY_FSTRIM) {
1100 dpolicy->io_aware = false;
1101 } else if (discard_type == DPOLICY_UMOUNT) {
1102 dpolicy->max_requests = UINT_MAX;
1103 dpolicy->io_aware = false;
1104
1105 dpolicy->granularity = 1;
1106 }
1107}
1108
1109static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1110 struct block_device *bdev, block_t lstart,
1111 block_t start, block_t len);
1112
1113static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1114 struct discard_policy *dpolicy,
1115 struct discard_cmd *dc,
1116 unsigned int *issued)
1117{
1118 struct block_device *bdev = dc->bdev;
1119 struct request_queue *q = bdev_get_queue(bdev);
1120 unsigned int max_discard_blocks =
1121 SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1122 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1123 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1124 &(dcc->fstrim_list) : &(dcc->wait_list);
1125 int flag = dpolicy->sync ? REQ_SYNC : 0;
1126 block_t lstart, start, len, total_len;
1127 int err = 0;
1128
1129 if (dc->state != D_PREP)
1130 return 0;
1131
1132 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1133 return 0;
1134
1135 trace_f2fs_issue_discard(bdev, dc->start, dc->len);
1136
1137 lstart = dc->lstart;
1138 start = dc->start;
1139 len = dc->len;
1140 total_len = len;
1141
1142 dc->len = 0;
1143
1144 while (total_len && *issued < dpolicy->max_requests && !err) {
1145 struct bio *bio = NULL;
1146 unsigned long flags;
1147 bool last = true;
1148
1149 if (len > max_discard_blocks) {
1150 len = max_discard_blocks;
1151 last = false;
1152 }
1153
1154 (*issued)++;
1155 if (*issued == dpolicy->max_requests)
1156 last = true;
1157
1158 dc->len += len;
1159
1160 if (time_to_inject(sbi, FAULT_DISCARD)) {
1161 f2fs_show_injection_info(FAULT_DISCARD);
1162 err = -EIO;
1163 goto submit;
1164 }
1165 err = __blkdev_issue_discard(bdev,
1166 SECTOR_FROM_BLOCK(start),
1167 SECTOR_FROM_BLOCK(len),
1168 GFP_NOFS, 0, &bio);
1169submit:
1170 if (err) {
1171 spin_lock_irqsave(&dc->lock, flags);
1172 if (dc->state == D_PARTIAL)
1173 dc->state = D_SUBMIT;
1174 spin_unlock_irqrestore(&dc->lock, flags);
1175
1176 break;
1177 }
1178
1179 f2fs_bug_on(sbi, !bio);
1180
1181
1182
1183
1184
1185 spin_lock_irqsave(&dc->lock, flags);
1186 if (last)
1187 dc->state = D_SUBMIT;
1188 else
1189 dc->state = D_PARTIAL;
1190 dc->bio_ref++;
1191 spin_unlock_irqrestore(&dc->lock, flags);
1192
1193 atomic_inc(&dcc->queued_discard);
1194 dc->queued++;
1195 list_move_tail(&dc->list, wait_list);
1196
1197
1198 __check_sit_bitmap(sbi, lstart, lstart + len);
1199
1200 bio->bi_private = dc;
1201 bio->bi_end_io = f2fs_submit_discard_endio;
1202 bio->bi_opf |= flag;
1203 submit_bio(bio);
1204
1205 atomic_inc(&dcc->issued_discard);
1206
1207 f2fs_update_iostat(sbi, FS_DISCARD, 1);
1208
1209 lstart += len;
1210 start += len;
1211 total_len -= len;
1212 len = total_len;
1213 }
1214
1215 if (!err && len)
1216 __update_discard_tree_range(sbi, bdev, lstart, start, len);
1217 return err;
1218}
1219
1220static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
1221 struct block_device *bdev, block_t lstart,
1222 block_t start, block_t len,
1223 struct rb_node **insert_p,
1224 struct rb_node *insert_parent)
1225{
1226 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1227 struct rb_node **p;
1228 struct rb_node *parent = NULL;
1229 struct discard_cmd *dc = NULL;
1230 bool leftmost = true;
1231
1232 if (insert_p && insert_parent) {
1233 parent = insert_parent;
1234 p = insert_p;
1235 goto do_insert;
1236 }
1237
1238 p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
1239 lstart, &leftmost);
1240do_insert:
1241 dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
1242 p, leftmost);
1243 if (!dc)
1244 return NULL;
1245
1246 return dc;
1247}
1248
1249static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1250 struct discard_cmd *dc)
1251{
1252 list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
1253}
1254
1255static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1256 struct discard_cmd *dc, block_t blkaddr)
1257{
1258 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1259 struct discard_info di = dc->di;
1260 bool modified = false;
1261
1262 if (dc->state == D_DONE || dc->len == 1) {
1263 __remove_discard_cmd(sbi, dc);
1264 return;
1265 }
1266
1267 dcc->undiscard_blks -= di.len;
1268
1269 if (blkaddr > di.lstart) {
1270 dc->len = blkaddr - dc->lstart;
1271 dcc->undiscard_blks += dc->len;
1272 __relocate_discard_cmd(dcc, dc);
1273 modified = true;
1274 }
1275
1276 if (blkaddr < di.lstart + di.len - 1) {
1277 if (modified) {
1278 __insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
1279 di.start + blkaddr + 1 - di.lstart,
1280 di.lstart + di.len - 1 - blkaddr,
1281 NULL, NULL);
1282 } else {
1283 dc->lstart++;
1284 dc->len--;
1285 dc->start++;
1286 dcc->undiscard_blks += dc->len;
1287 __relocate_discard_cmd(dcc, dc);
1288 }
1289 }
1290}
1291
1292static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1293 struct block_device *bdev, block_t lstart,
1294 block_t start, block_t len)
1295{
1296 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1297 struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1298 struct discard_cmd *dc;
1299 struct discard_info di = {0};
1300 struct rb_node **insert_p = NULL, *insert_parent = NULL;
1301 struct request_queue *q = bdev_get_queue(bdev);
1302 unsigned int max_discard_blocks =
1303 SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1304 block_t end = lstart + len;
1305
1306 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1307 NULL, lstart,
1308 (struct rb_entry **)&prev_dc,
1309 (struct rb_entry **)&next_dc,
1310 &insert_p, &insert_parent, true, NULL);
1311 if (dc)
1312 prev_dc = dc;
1313
1314 if (!prev_dc) {
1315 di.lstart = lstart;
1316 di.len = next_dc ? next_dc->lstart - lstart : len;
1317 di.len = min(di.len, len);
1318 di.start = start;
1319 }
1320
1321 while (1) {
1322 struct rb_node *node;
1323 bool merged = false;
1324 struct discard_cmd *tdc = NULL;
1325
1326 if (prev_dc) {
1327 di.lstart = prev_dc->lstart + prev_dc->len;
1328 if (di.lstart < lstart)
1329 di.lstart = lstart;
1330 if (di.lstart >= end)
1331 break;
1332
1333 if (!next_dc || next_dc->lstart > end)
1334 di.len = end - di.lstart;
1335 else
1336 di.len = next_dc->lstart - di.lstart;
1337 di.start = start + di.lstart - lstart;
1338 }
1339
1340 if (!di.len)
1341 goto next;
1342
1343 if (prev_dc && prev_dc->state == D_PREP &&
1344 prev_dc->bdev == bdev &&
1345 __is_discard_back_mergeable(&di, &prev_dc->di,
1346 max_discard_blocks)) {
1347 prev_dc->di.len += di.len;
1348 dcc->undiscard_blks += di.len;
1349 __relocate_discard_cmd(dcc, prev_dc);
1350 di = prev_dc->di;
1351 tdc = prev_dc;
1352 merged = true;
1353 }
1354
1355 if (next_dc && next_dc->state == D_PREP &&
1356 next_dc->bdev == bdev &&
1357 __is_discard_front_mergeable(&di, &next_dc->di,
1358 max_discard_blocks)) {
1359 next_dc->di.lstart = di.lstart;
1360 next_dc->di.len += di.len;
1361 next_dc->di.start = di.start;
1362 dcc->undiscard_blks += di.len;
1363 __relocate_discard_cmd(dcc, next_dc);
1364 if (tdc)
1365 __remove_discard_cmd(sbi, tdc);
1366 merged = true;
1367 }
1368
1369 if (!merged) {
1370 __insert_discard_tree(sbi, bdev, di.lstart, di.start,
1371 di.len, NULL, NULL);
1372 }
1373 next:
1374 prev_dc = next_dc;
1375 if (!prev_dc)
1376 break;
1377
1378 node = rb_next(&prev_dc->rb_node);
1379 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1380 }
1381}
1382
1383static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
1384 struct block_device *bdev, block_t blkstart, block_t blklen)
1385{
1386 block_t lblkstart = blkstart;
1387
1388 if (!f2fs_bdev_support_discard(bdev))
1389 return 0;
1390
1391 trace_f2fs_queue_discard(bdev, blkstart, blklen);
1392
1393 if (f2fs_is_multi_device(sbi)) {
1394 int devi = f2fs_target_device_index(sbi, blkstart);
1395
1396 blkstart -= FDEV(devi).start_blk;
1397 }
1398 mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1399 __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1400 mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1401 return 0;
1402}
1403
1404static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1405 struct discard_policy *dpolicy)
1406{
1407 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1408 struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1409 struct rb_node **insert_p = NULL, *insert_parent = NULL;
1410 struct discard_cmd *dc;
1411 struct blk_plug plug;
1412 unsigned int pos = dcc->next_pos;
1413 unsigned int issued = 0;
1414 bool io_interrupted = false;
1415
1416 mutex_lock(&dcc->cmd_lock);
1417 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1418 NULL, pos,
1419 (struct rb_entry **)&prev_dc,
1420 (struct rb_entry **)&next_dc,
1421 &insert_p, &insert_parent, true, NULL);
1422 if (!dc)
1423 dc = next_dc;
1424
1425 blk_start_plug(&plug);
1426
1427 while (dc) {
1428 struct rb_node *node;
1429 int err = 0;
1430
1431 if (dc->state != D_PREP)
1432 goto next;
1433
1434 if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
1435 io_interrupted = true;
1436 break;
1437 }
1438
1439 dcc->next_pos = dc->lstart + dc->len;
1440 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1441
1442 if (issued >= dpolicy->max_requests)
1443 break;
1444next:
1445 node = rb_next(&dc->rb_node);
1446 if (err)
1447 __remove_discard_cmd(sbi, dc);
1448 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1449 }
1450
1451 blk_finish_plug(&plug);
1452
1453 if (!dc)
1454 dcc->next_pos = 0;
1455
1456 mutex_unlock(&dcc->cmd_lock);
1457
1458 if (!issued && io_interrupted)
1459 issued = -1;
1460
1461 return issued;
1462}
1463
1464static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1465 struct discard_policy *dpolicy)
1466{
1467 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1468 struct list_head *pend_list;
1469 struct discard_cmd *dc, *tmp;
1470 struct blk_plug plug;
1471 int i, issued = 0;
1472 bool io_interrupted = false;
1473
1474 if (dpolicy->timeout != 0)
1475 f2fs_update_time(sbi, dpolicy->timeout);
1476
1477 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1478 if (dpolicy->timeout != 0 &&
1479 f2fs_time_over(sbi, dpolicy->timeout))
1480 break;
1481
1482 if (i + 1 < dpolicy->granularity)
1483 break;
1484
1485 if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
1486 return __issue_discard_cmd_orderly(sbi, dpolicy);
1487
1488 pend_list = &dcc->pend_list[i];
1489
1490 mutex_lock(&dcc->cmd_lock);
1491 if (list_empty(pend_list))
1492 goto next;
1493 if (unlikely(dcc->rbtree_check))
1494 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
1495 &dcc->root));
1496 blk_start_plug(&plug);
1497 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1498 f2fs_bug_on(sbi, dc->state != D_PREP);
1499
1500 if (dpolicy->timeout != 0 &&
1501 f2fs_time_over(sbi, dpolicy->timeout))
1502 break;
1503
1504 if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1505 !is_idle(sbi, DISCARD_TIME)) {
1506 io_interrupted = true;
1507 break;
1508 }
1509
1510 __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1511
1512 if (issued >= dpolicy->max_requests)
1513 break;
1514 }
1515 blk_finish_plug(&plug);
1516next:
1517 mutex_unlock(&dcc->cmd_lock);
1518
1519 if (issued >= dpolicy->max_requests || io_interrupted)
1520 break;
1521 }
1522
1523 if (!issued && io_interrupted)
1524 issued = -1;
1525
1526 return issued;
1527}
1528
1529static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1530{
1531 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1532 struct list_head *pend_list;
1533 struct discard_cmd *dc, *tmp;
1534 int i;
1535 bool dropped = false;
1536
1537 mutex_lock(&dcc->cmd_lock);
1538 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1539 pend_list = &dcc->pend_list[i];
1540 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1541 f2fs_bug_on(sbi, dc->state != D_PREP);
1542 __remove_discard_cmd(sbi, dc);
1543 dropped = true;
1544 }
1545 }
1546 mutex_unlock(&dcc->cmd_lock);
1547
1548 return dropped;
1549}
1550
1551void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1552{
1553 __drop_discard_cmd(sbi);
1554}
1555
1556static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1557 struct discard_cmd *dc)
1558{
1559 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1560 unsigned int len = 0;
1561
1562 wait_for_completion_io(&dc->wait);
1563 mutex_lock(&dcc->cmd_lock);
1564 f2fs_bug_on(sbi, dc->state != D_DONE);
1565 dc->ref--;
1566 if (!dc->ref) {
1567 if (!dc->error)
1568 len = dc->len;
1569 __remove_discard_cmd(sbi, dc);
1570 }
1571 mutex_unlock(&dcc->cmd_lock);
1572
1573 return len;
1574}
1575
1576static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1577 struct discard_policy *dpolicy,
1578 block_t start, block_t end)
1579{
1580 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1581 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1582 &(dcc->fstrim_list) : &(dcc->wait_list);
1583 struct discard_cmd *dc, *tmp;
1584 bool need_wait;
1585 unsigned int trimmed = 0;
1586
1587next:
1588 need_wait = false;
1589
1590 mutex_lock(&dcc->cmd_lock);
1591 list_for_each_entry_safe(dc, tmp, wait_list, list) {
1592 if (dc->lstart + dc->len <= start || end <= dc->lstart)
1593 continue;
1594 if (dc->len < dpolicy->granularity)
1595 continue;
1596 if (dc->state == D_DONE && !dc->ref) {
1597 wait_for_completion_io(&dc->wait);
1598 if (!dc->error)
1599 trimmed += dc->len;
1600 __remove_discard_cmd(sbi, dc);
1601 } else {
1602 dc->ref++;
1603 need_wait = true;
1604 break;
1605 }
1606 }
1607 mutex_unlock(&dcc->cmd_lock);
1608
1609 if (need_wait) {
1610 trimmed += __wait_one_discard_bio(sbi, dc);
1611 goto next;
1612 }
1613
1614 return trimmed;
1615}
1616
1617static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1618 struct discard_policy *dpolicy)
1619{
1620 struct discard_policy dp;
1621 unsigned int discard_blks;
1622
1623 if (dpolicy)
1624 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1625
1626
1627 __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
1628 discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1629 __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
1630 discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1631
1632 return discard_blks;
1633}
1634
1635
1636static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1637{
1638 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1639 struct discard_cmd *dc;
1640 bool need_wait = false;
1641
1642 mutex_lock(&dcc->cmd_lock);
1643 dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
1644 NULL, blkaddr);
1645 if (dc) {
1646 if (dc->state == D_PREP) {
1647 __punch_discard_cmd(sbi, dc, blkaddr);
1648 } else {
1649 dc->ref++;
1650 need_wait = true;
1651 }
1652 }
1653 mutex_unlock(&dcc->cmd_lock);
1654
1655 if (need_wait)
1656 __wait_one_discard_bio(sbi, dc);
1657}
1658
1659void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1660{
1661 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1662
1663 if (dcc && dcc->f2fs_issue_discard) {
1664 struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1665
1666 dcc->f2fs_issue_discard = NULL;
1667 kthread_stop(discard_thread);
1668 }
1669}
1670
1671
1672bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
1673{
1674 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1675 struct discard_policy dpolicy;
1676 bool dropped;
1677
1678 __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
1679 dcc->discard_granularity);
1680 dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
1681 __issue_discard_cmd(sbi, &dpolicy);
1682 dropped = __drop_discard_cmd(sbi);
1683
1684
1685 __wait_all_discard_cmd(sbi, NULL);
1686
1687 f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1688 return dropped;
1689}
1690
1691static int issue_discard_thread(void *data)
1692{
1693 struct f2fs_sb_info *sbi = data;
1694 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1695 wait_queue_head_t *q = &dcc->discard_wait_queue;
1696 struct discard_policy dpolicy;
1697 unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
1698 int issued;
1699
1700 set_freezable();
1701
1702 do {
1703 __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
1704 dcc->discard_granularity);
1705
1706 wait_event_interruptible_timeout(*q,
1707 kthread_should_stop() || freezing(current) ||
1708 dcc->discard_wake,
1709 msecs_to_jiffies(wait_ms));
1710
1711 if (dcc->discard_wake)
1712 dcc->discard_wake = 0;
1713
1714
1715 if (atomic_read(&dcc->queued_discard))
1716 __wait_all_discard_cmd(sbi, NULL);
1717
1718 if (try_to_freeze())
1719 continue;
1720 if (f2fs_readonly(sbi->sb))
1721 continue;
1722 if (kthread_should_stop())
1723 return 0;
1724 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
1725 wait_ms = dpolicy.max_interval;
1726 continue;
1727 }
1728
1729 if (sbi->gc_mode == GC_URGENT)
1730 __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
1731
1732 sb_start_intwrite(sbi->sb);
1733
1734 issued = __issue_discard_cmd(sbi, &dpolicy);
1735 if (issued > 0) {
1736 __wait_all_discard_cmd(sbi, &dpolicy);
1737 wait_ms = dpolicy.min_interval;
1738 } else if (issued == -1){
1739 wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
1740 if (!wait_ms)
1741 wait_ms = dpolicy.mid_interval;
1742 } else {
1743 wait_ms = dpolicy.max_interval;
1744 }
1745
1746 sb_end_intwrite(sbi->sb);
1747
1748 } while (!kthread_should_stop());
1749 return 0;
1750}
1751
1752#ifdef CONFIG_BLK_DEV_ZONED
1753static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1754 struct block_device *bdev, block_t blkstart, block_t blklen)
1755{
1756 sector_t sector, nr_sects;
1757 block_t lblkstart = blkstart;
1758 int devi = 0;
1759
1760 if (f2fs_is_multi_device(sbi)) {
1761 devi = f2fs_target_device_index(sbi, blkstart);
1762 if (blkstart < FDEV(devi).start_blk ||
1763 blkstart > FDEV(devi).end_blk) {
1764 f2fs_err(sbi, "Invalid block %x", blkstart);
1765 return -EIO;
1766 }
1767 blkstart -= FDEV(devi).start_blk;
1768 }
1769
1770
1771 if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
1772 sector = SECTOR_FROM_BLOCK(blkstart);
1773 nr_sects = SECTOR_FROM_BLOCK(blklen);
1774
1775 if (sector & (bdev_zone_sectors(bdev) - 1) ||
1776 nr_sects != bdev_zone_sectors(bdev)) {
1777 f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
1778 devi, sbi->s_ndevs ? FDEV(devi).path : "",
1779 blkstart, blklen);
1780 return -EIO;
1781 }
1782 trace_f2fs_issue_reset_zone(bdev, blkstart);
1783 return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
1784 }
1785
1786
1787 return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1788}
1789#endif
1790
1791static int __issue_discard_async(struct f2fs_sb_info *sbi,
1792 struct block_device *bdev, block_t blkstart, block_t blklen)
1793{
1794#ifdef CONFIG_BLK_DEV_ZONED
1795 if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
1796 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
1797#endif
1798 return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
1799}
1800
1801static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
1802 block_t blkstart, block_t blklen)
1803{
1804 sector_t start = blkstart, len = 0;
1805 struct block_device *bdev;
1806 struct seg_entry *se;
1807 unsigned int offset;
1808 block_t i;
1809 int err = 0;
1810
1811 bdev = f2fs_target_device(sbi, blkstart, NULL);
1812
1813 for (i = blkstart; i < blkstart + blklen; i++, len++) {
1814 if (i != start) {
1815 struct block_device *bdev2 =
1816 f2fs_target_device(sbi, i, NULL);
1817
1818 if (bdev2 != bdev) {
1819 err = __issue_discard_async(sbi, bdev,
1820 start, len);
1821 if (err)
1822 return err;
1823 bdev = bdev2;
1824 start = i;
1825 len = 0;
1826 }
1827 }
1828
1829 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
1830 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
1831
1832 if (!f2fs_test_and_set_bit(offset, se->discard_map))
1833 sbi->discard_blks--;
1834 }
1835
1836 if (len)
1837 err = __issue_discard_async(sbi, bdev, start, len);
1838 return err;
1839}
1840
1841static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
1842 bool check_only)
1843{
1844 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
1845 int max_blocks = sbi->blocks_per_seg;
1846 struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
1847 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
1848 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1849 unsigned long *discard_map = (unsigned long *)se->discard_map;
1850 unsigned long *dmap = SIT_I(sbi)->tmp_map;
1851 unsigned int start = 0, end = -1;
1852 bool force = (cpc->reason & CP_DISCARD);
1853 struct discard_entry *de = NULL;
1854 struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
1855 int i;
1856
1857 if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
1858 return false;
1859
1860 if (!force) {
1861 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
1862 SM_I(sbi)->dcc_info->nr_discards >=
1863 SM_I(sbi)->dcc_info->max_discards)
1864 return false;
1865 }
1866
1867
1868 for (i = 0; i < entries; i++)
1869 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
1870 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
1871
1872 while (force || SM_I(sbi)->dcc_info->nr_discards <=
1873 SM_I(sbi)->dcc_info->max_discards) {
1874 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
1875 if (start >= max_blocks)
1876 break;
1877
1878 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
1879 if (force && start && end != max_blocks
1880 && (end - start) < cpc->trim_minlen)
1881 continue;
1882
1883 if (check_only)
1884 return true;
1885
1886 if (!de) {
1887 de = f2fs_kmem_cache_alloc(discard_entry_slab,
1888 GFP_F2FS_ZERO);
1889 de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
1890 list_add_tail(&de->list, head);
1891 }
1892
1893 for (i = start; i < end; i++)
1894 __set_bit_le(i, (void *)de->discard_map);
1895
1896 SM_I(sbi)->dcc_info->nr_discards += end - start;
1897 }
1898 return false;
1899}
1900
1901static void release_discard_addr(struct discard_entry *entry)
1902{
1903 list_del(&entry->list);
1904 kmem_cache_free(discard_entry_slab, entry);
1905}
1906
1907void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
1908{
1909 struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
1910 struct discard_entry *entry, *this;
1911
1912
1913 list_for_each_entry_safe(entry, this, head, list)
1914 release_discard_addr(entry);
1915}
1916
1917
1918
1919
1920static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
1921{
1922 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1923 unsigned int segno;
1924
1925 mutex_lock(&dirty_i->seglist_lock);
1926 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
1927 __set_test_and_free(sbi, segno);
1928 mutex_unlock(&dirty_i->seglist_lock);
1929}
1930
1931void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
1932 struct cp_control *cpc)
1933{
1934 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1935 struct list_head *head = &dcc->entry_list;
1936 struct discard_entry *entry, *this;
1937 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1938 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
1939 unsigned int start = 0, end = -1;
1940 unsigned int secno, start_segno;
1941 bool force = (cpc->reason & CP_DISCARD);
1942 bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
1943
1944 mutex_lock(&dirty_i->seglist_lock);
1945
1946 while (1) {
1947 int i;
1948
1949 if (need_align && end != -1)
1950 end--;
1951 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
1952 if (start >= MAIN_SEGS(sbi))
1953 break;
1954 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
1955 start + 1);
1956
1957 if (need_align) {
1958 start = rounddown(start, sbi->segs_per_sec);
1959 end = roundup(end, sbi->segs_per_sec);
1960 }
1961
1962 for (i = start; i < end; i++) {
1963 if (test_and_clear_bit(i, prefree_map))
1964 dirty_i->nr_dirty[PRE]--;
1965 }
1966
1967 if (!f2fs_realtime_discard_enable(sbi))
1968 continue;
1969
1970 if (force && start >= cpc->trim_start &&
1971 (end - 1) <= cpc->trim_end)
1972 continue;
1973
1974 if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) {
1975 f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
1976 (end - start) << sbi->log_blocks_per_seg);
1977 continue;
1978 }
1979next:
1980 secno = GET_SEC_FROM_SEG(sbi, start);
1981 start_segno = GET_SEG_FROM_SEC(sbi, secno);
1982 if (!IS_CURSEC(sbi, secno) &&
1983 !get_valid_blocks(sbi, start, true))
1984 f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
1985 sbi->segs_per_sec << sbi->log_blocks_per_seg);
1986
1987 start = start_segno + sbi->segs_per_sec;
1988 if (start < end)
1989 goto next;
1990 else
1991 end = start - 1;
1992 }
1993 mutex_unlock(&dirty_i->seglist_lock);
1994
1995
1996 list_for_each_entry_safe(entry, this, head, list) {
1997 unsigned int cur_pos = 0, next_pos, len, total_len = 0;
1998 bool is_valid = test_bit_le(0, entry->discard_map);
1999
2000find_next:
2001 if (is_valid) {
2002 next_pos = find_next_zero_bit_le(entry->discard_map,
2003 sbi->blocks_per_seg, cur_pos);
2004 len = next_pos - cur_pos;
2005
2006 if (f2fs_sb_has_blkzoned(sbi) ||
2007 (force && len < cpc->trim_minlen))
2008 goto skip;
2009
2010 f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
2011 len);
2012 total_len += len;
2013 } else {
2014 next_pos = find_next_bit_le(entry->discard_map,
2015 sbi->blocks_per_seg, cur_pos);
2016 }
2017skip:
2018 cur_pos = next_pos;
2019 is_valid = !is_valid;
2020
2021 if (cur_pos < sbi->blocks_per_seg)
2022 goto find_next;
2023
2024 release_discard_addr(entry);
2025 dcc->nr_discards -= total_len;
2026 }
2027
2028 wake_up_discard_thread(sbi, false);
2029}
2030
2031static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
2032{
2033 dev_t dev = sbi->sb->s_bdev->bd_dev;
2034 struct discard_cmd_control *dcc;
2035 int err = 0, i;
2036
2037 if (SM_I(sbi)->dcc_info) {
2038 dcc = SM_I(sbi)->dcc_info;
2039 goto init_thread;
2040 }
2041
2042 dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
2043 if (!dcc)
2044 return -ENOMEM;
2045
2046 dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
2047 INIT_LIST_HEAD(&dcc->entry_list);
2048 for (i = 0; i < MAX_PLIST_NUM; i++)
2049 INIT_LIST_HEAD(&dcc->pend_list[i]);
2050 INIT_LIST_HEAD(&dcc->wait_list);
2051 INIT_LIST_HEAD(&dcc->fstrim_list);
2052 mutex_init(&dcc->cmd_lock);
2053 atomic_set(&dcc->issued_discard, 0);
2054 atomic_set(&dcc->queued_discard, 0);
2055 atomic_set(&dcc->discard_cmd_cnt, 0);
2056 dcc->nr_discards = 0;
2057 dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
2058 dcc->undiscard_blks = 0;
2059 dcc->next_pos = 0;
2060 dcc->root = RB_ROOT_CACHED;
2061 dcc->rbtree_check = false;
2062
2063 init_waitqueue_head(&dcc->discard_wait_queue);
2064 SM_I(sbi)->dcc_info = dcc;
2065init_thread:
2066 dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
2067 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
2068 if (IS_ERR(dcc->f2fs_issue_discard)) {
2069 err = PTR_ERR(dcc->f2fs_issue_discard);
2070 kvfree(dcc);
2071 SM_I(sbi)->dcc_info = NULL;
2072 return err;
2073 }
2074
2075 return err;
2076}
2077
2078static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2079{
2080 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2081
2082 if (!dcc)
2083 return;
2084
2085 f2fs_stop_discard_thread(sbi);
2086
2087 kvfree(dcc);
2088 SM_I(sbi)->dcc_info = NULL;
2089}
2090
2091static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
2092{
2093 struct sit_info *sit_i = SIT_I(sbi);
2094
2095 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
2096 sit_i->dirty_sentries++;
2097 return false;
2098 }
2099
2100 return true;
2101}
2102
2103static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
2104 unsigned int segno, int modified)
2105{
2106 struct seg_entry *se = get_seg_entry(sbi, segno);
2107 se->type = type;
2108 if (modified)
2109 __mark_sit_entry_dirty(sbi, segno);
2110}
2111
2112static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2113{
2114 struct seg_entry *se;
2115 unsigned int segno, offset;
2116 long int new_vblocks;
2117 bool exist;
2118#ifdef CONFIG_F2FS_CHECK_FS
2119 bool mir_exist;
2120#endif
2121
2122 segno = GET_SEGNO(sbi, blkaddr);
2123
2124 se = get_seg_entry(sbi, segno);
2125 new_vblocks = se->valid_blocks + del;
2126 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2127
2128 f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
2129 (new_vblocks > sbi->blocks_per_seg)));
2130
2131 se->valid_blocks = new_vblocks;
2132 se->mtime = get_mtime(sbi, false);
2133 if (se->mtime > SIT_I(sbi)->max_mtime)
2134 SIT_I(sbi)->max_mtime = se->mtime;
2135
2136
2137 if (del > 0) {
2138 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2139#ifdef CONFIG_F2FS_CHECK_FS
2140 mir_exist = f2fs_test_and_set_bit(offset,
2141 se->cur_valid_map_mir);
2142 if (unlikely(exist != mir_exist)) {
2143 f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
2144 blkaddr, exist);
2145 f2fs_bug_on(sbi, 1);
2146 }
2147#endif
2148 if (unlikely(exist)) {
2149 f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
2150 blkaddr);
2151 f2fs_bug_on(sbi, 1);
2152 se->valid_blocks--;
2153 del = 0;
2154 }
2155
2156 if (!f2fs_test_and_set_bit(offset, se->discard_map))
2157 sbi->discard_blks--;
2158
2159
2160 if (IS_NODESEG(se->type) &&
2161 !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2162 if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2163 se->ckpt_valid_blocks++;
2164 }
2165 } else {
2166 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2167#ifdef CONFIG_F2FS_CHECK_FS
2168 mir_exist = f2fs_test_and_clear_bit(offset,
2169 se->cur_valid_map_mir);
2170 if (unlikely(exist != mir_exist)) {
2171 f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
2172 blkaddr, exist);
2173 f2fs_bug_on(sbi, 1);
2174 }
2175#endif
2176 if (unlikely(!exist)) {
2177 f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
2178 blkaddr);
2179 f2fs_bug_on(sbi, 1);
2180 se->valid_blocks++;
2181 del = 0;
2182 } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2183
2184
2185
2186
2187
2188
2189 if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
2190 spin_lock(&sbi->stat_lock);
2191 sbi->unusable_block_count++;
2192 spin_unlock(&sbi->stat_lock);
2193 }
2194 }
2195
2196 if (f2fs_test_and_clear_bit(offset, se->discard_map))
2197 sbi->discard_blks++;
2198 }
2199 if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2200 se->ckpt_valid_blocks += del;
2201
2202 __mark_sit_entry_dirty(sbi, segno);
2203
2204
2205 SIT_I(sbi)->written_valid_blocks += del;
2206
2207 if (__is_large_section(sbi))
2208 get_sec_entry(sbi, segno)->valid_blocks += del;
2209}
2210
2211void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2212{
2213 unsigned int segno = GET_SEGNO(sbi, addr);
2214 struct sit_info *sit_i = SIT_I(sbi);
2215
2216 f2fs_bug_on(sbi, addr == NULL_ADDR);
2217 if (addr == NEW_ADDR)
2218 return;
2219
2220 invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
2221
2222
2223 down_write(&sit_i->sentry_lock);
2224
2225 update_sit_entry(sbi, addr, -1);
2226
2227
2228 locate_dirty_segment(sbi, segno);
2229
2230 up_write(&sit_i->sentry_lock);
2231}
2232
2233bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2234{
2235 struct sit_info *sit_i = SIT_I(sbi);
2236 unsigned int segno, offset;
2237 struct seg_entry *se;
2238 bool is_cp = false;
2239
2240 if (!__is_valid_data_blkaddr(blkaddr))
2241 return true;
2242
2243 down_read(&sit_i->sentry_lock);
2244
2245 segno = GET_SEGNO(sbi, blkaddr);
2246 se = get_seg_entry(sbi, segno);
2247 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2248
2249 if (f2fs_test_bit(offset, se->ckpt_valid_map))
2250 is_cp = true;
2251
2252 up_read(&sit_i->sentry_lock);
2253
2254 return is_cp;
2255}
2256
2257
2258
2259
2260static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
2261 struct f2fs_summary *sum)
2262{
2263 struct curseg_info *curseg = CURSEG_I(sbi, type);
2264 void *addr = curseg->sum_blk;
2265 addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
2266 memcpy(addr, sum, sizeof(struct f2fs_summary));
2267}
2268
2269
2270
2271
2272int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2273{
2274 int valid_sum_count = 0;
2275 int i, sum_in_page;
2276
2277 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2278 if (sbi->ckpt->alloc_type[i] == SSR)
2279 valid_sum_count += sbi->blocks_per_seg;
2280 else {
2281 if (for_ra)
2282 valid_sum_count += le16_to_cpu(
2283 F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2284 else
2285 valid_sum_count += curseg_blkoff(sbi, i);
2286 }
2287 }
2288
2289 sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2290 SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2291 if (valid_sum_count <= sum_in_page)
2292 return 1;
2293 else if ((valid_sum_count - sum_in_page) <=
2294 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2295 return 2;
2296 return 3;
2297}
2298
2299
2300
2301
2302struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2303{
2304 return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
2305}
2306
2307void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2308 void *src, block_t blk_addr)
2309{
2310 struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2311
2312 memcpy(page_address(page), src, PAGE_SIZE);
2313 set_page_dirty(page);
2314 f2fs_put_page(page, 1);
2315}
2316
2317static void write_sum_page(struct f2fs_sb_info *sbi,
2318 struct f2fs_summary_block *sum_blk, block_t blk_addr)
2319{
2320 f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2321}
2322
2323static void write_current_sum_page(struct f2fs_sb_info *sbi,
2324 int type, block_t blk_addr)
2325{
2326 struct curseg_info *curseg = CURSEG_I(sbi, type);
2327 struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2328 struct f2fs_summary_block *src = curseg->sum_blk;
2329 struct f2fs_summary_block *dst;
2330
2331 dst = (struct f2fs_summary_block *)page_address(page);
2332 memset(dst, 0, PAGE_SIZE);
2333
2334 mutex_lock(&curseg->curseg_mutex);
2335
2336 down_read(&curseg->journal_rwsem);
2337 memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2338 up_read(&curseg->journal_rwsem);
2339
2340 memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2341 memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2342
2343 mutex_unlock(&curseg->curseg_mutex);
2344
2345 set_page_dirty(page);
2346 f2fs_put_page(page, 1);
2347}
2348
2349static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
2350{
2351 struct curseg_info *curseg = CURSEG_I(sbi, type);
2352 unsigned int segno = curseg->segno + 1;
2353 struct free_segmap_info *free_i = FREE_I(sbi);
2354
2355 if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
2356 return !test_bit(segno, free_i->free_segmap);
2357 return 0;
2358}
2359
2360
2361
2362
2363
2364static void get_new_segment(struct f2fs_sb_info *sbi,
2365 unsigned int *newseg, bool new_sec, int dir)
2366{
2367 struct free_segmap_info *free_i = FREE_I(sbi);
2368 unsigned int segno, secno, zoneno;
2369 unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2370 unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2371 unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2372 unsigned int left_start = hint;
2373 bool init = true;
2374 int go_left = 0;
2375 int i;
2376
2377 spin_lock(&free_i->segmap_lock);
2378
2379 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
2380 segno = find_next_zero_bit(free_i->free_segmap,
2381 GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2382 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2383 goto got_it;
2384 }
2385find_other_zone:
2386 secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2387 if (secno >= MAIN_SECS(sbi)) {
2388 if (dir == ALLOC_RIGHT) {
2389 secno = find_next_zero_bit(free_i->free_secmap,
2390 MAIN_SECS(sbi), 0);
2391 f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
2392 } else {
2393 go_left = 1;
2394 left_start = hint - 1;
2395 }
2396 }
2397 if (go_left == 0)
2398 goto skip_left;
2399
2400 while (test_bit(left_start, free_i->free_secmap)) {
2401 if (left_start > 0) {
2402 left_start--;
2403 continue;
2404 }
2405 left_start = find_next_zero_bit(free_i->free_secmap,
2406 MAIN_SECS(sbi), 0);
2407 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
2408 break;
2409 }
2410 secno = left_start;
2411skip_left:
2412 segno = GET_SEG_FROM_SEC(sbi, secno);
2413 zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2414
2415
2416 if (!init)
2417 goto got_it;
2418 if (sbi->secs_per_zone == 1)
2419 goto got_it;
2420 if (zoneno == old_zoneno)
2421 goto got_it;
2422 if (dir == ALLOC_LEFT) {
2423 if (!go_left && zoneno + 1 >= total_zones)
2424 goto got_it;
2425 if (go_left && zoneno == 0)
2426 goto got_it;
2427 }
2428 for (i = 0; i < NR_CURSEG_TYPE; i++)
2429 if (CURSEG_I(sbi, i)->zone == zoneno)
2430 break;
2431
2432 if (i < NR_CURSEG_TYPE) {
2433
2434 if (go_left)
2435 hint = zoneno * sbi->secs_per_zone - 1;
2436 else if (zoneno + 1 >= total_zones)
2437 hint = 0;
2438 else
2439 hint = (zoneno + 1) * sbi->secs_per_zone;
2440 init = false;
2441 goto find_other_zone;
2442 }
2443got_it:
2444
2445 f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2446 __set_inuse(sbi, segno);
2447 *newseg = segno;
2448 spin_unlock(&free_i->segmap_lock);
2449}
2450
2451static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2452{
2453 struct curseg_info *curseg = CURSEG_I(sbi, type);
2454 struct summary_footer *sum_footer;
2455
2456 curseg->segno = curseg->next_segno;
2457 curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2458 curseg->next_blkoff = 0;
2459 curseg->next_segno = NULL_SEGNO;
2460
2461 sum_footer = &(curseg->sum_blk->footer);
2462 memset(sum_footer, 0, sizeof(struct summary_footer));
2463 if (IS_DATASEG(type))
2464 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2465 if (IS_NODESEG(type))
2466 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2467 __set_sit_entry_type(sbi, type, curseg->segno, modified);
2468}
2469
2470static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2471{
2472
2473 if (__is_large_section(sbi))
2474 return CURSEG_I(sbi, type)->segno;
2475
2476 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2477 return 0;
2478
2479 if (test_opt(sbi, NOHEAP) &&
2480 (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
2481 return 0;
2482
2483 if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2484 return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2485
2486
2487 if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2488 return 0;
2489
2490 return CURSEG_I(sbi, type)->segno;
2491}
2492
2493
2494
2495
2496
2497static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2498{
2499 struct curseg_info *curseg = CURSEG_I(sbi, type);
2500 unsigned int segno = curseg->segno;
2501 int dir = ALLOC_LEFT;
2502
2503 write_sum_page(sbi, curseg->sum_blk,
2504 GET_SUM_BLOCK(sbi, segno));
2505 if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
2506 dir = ALLOC_RIGHT;
2507
2508 if (test_opt(sbi, NOHEAP))
2509 dir = ALLOC_RIGHT;
2510
2511 segno = __get_next_segno(sbi, type);
2512 get_new_segment(sbi, &segno, new_sec, dir);
2513 curseg->next_segno = segno;
2514 reset_curseg(sbi, type, 1);
2515 curseg->alloc_type = LFS;
2516}
2517
2518static void __next_free_blkoff(struct f2fs_sb_info *sbi,
2519 struct curseg_info *seg, block_t start)
2520{
2521 struct seg_entry *se = get_seg_entry(sbi, seg->segno);
2522 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2523 unsigned long *target_map = SIT_I(sbi)->tmp_map;
2524 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2525 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2526 int i, pos;
2527
2528 for (i = 0; i < entries; i++)
2529 target_map[i] = ckpt_map[i] | cur_map[i];
2530
2531 pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
2532
2533 seg->next_blkoff = pos;
2534}
2535
2536
2537
2538
2539
2540
2541static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
2542 struct curseg_info *seg)
2543{
2544 if (seg->alloc_type == SSR)
2545 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
2546 else
2547 seg->next_blkoff++;
2548}
2549
2550
2551
2552
2553
2554static void change_curseg(struct f2fs_sb_info *sbi, int type)
2555{
2556 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2557 struct curseg_info *curseg = CURSEG_I(sbi, type);
2558 unsigned int new_segno = curseg->next_segno;
2559 struct f2fs_summary_block *sum_node;
2560 struct page *sum_page;
2561
2562 write_sum_page(sbi, curseg->sum_blk,
2563 GET_SUM_BLOCK(sbi, curseg->segno));
2564 __set_test_and_inuse(sbi, new_segno);
2565
2566 mutex_lock(&dirty_i->seglist_lock);
2567 __remove_dirty_segment(sbi, new_segno, PRE);
2568 __remove_dirty_segment(sbi, new_segno, DIRTY);
2569 mutex_unlock(&dirty_i->seglist_lock);
2570
2571 reset_curseg(sbi, type, 1);
2572 curseg->alloc_type = SSR;
2573 __next_free_blkoff(sbi, curseg, 0);
2574
2575 sum_page = f2fs_get_sum_page(sbi, new_segno);
2576 f2fs_bug_on(sbi, IS_ERR(sum_page));
2577 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2578 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2579 f2fs_put_page(sum_page, 1);
2580}
2581
2582static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
2583{
2584 struct curseg_info *curseg = CURSEG_I(sbi, type);
2585 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
2586 unsigned segno = NULL_SEGNO;
2587 int i, cnt;
2588 bool reversed = false;
2589
2590
2591 if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
2592 curseg->next_segno = segno;
2593 return 1;
2594 }
2595
2596
2597 if (IS_NODESEG(type)) {
2598 if (type >= CURSEG_WARM_NODE) {
2599 reversed = true;
2600 i = CURSEG_COLD_NODE;
2601 } else {
2602 i = CURSEG_HOT_NODE;
2603 }
2604 cnt = NR_CURSEG_NODE_TYPE;
2605 } else {
2606 if (type >= CURSEG_WARM_DATA) {
2607 reversed = true;
2608 i = CURSEG_COLD_DATA;
2609 } else {
2610 i = CURSEG_HOT_DATA;
2611 }
2612 cnt = NR_CURSEG_DATA_TYPE;
2613 }
2614
2615 for (; cnt-- > 0; reversed ? i-- : i++) {
2616 if (i == type)
2617 continue;
2618 if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
2619 curseg->next_segno = segno;
2620 return 1;
2621 }
2622 }
2623
2624
2625 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2626 segno = get_free_segment(sbi);
2627 if (segno != NULL_SEGNO) {
2628 curseg->next_segno = segno;
2629 return 1;
2630 }
2631 }
2632 return 0;
2633}
2634
2635
2636
2637
2638
2639static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
2640 int type, bool force)
2641{
2642 struct curseg_info *curseg = CURSEG_I(sbi, type);
2643
2644 if (force)
2645 new_curseg(sbi, type, true);
2646 else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
2647 type == CURSEG_WARM_NODE)
2648 new_curseg(sbi, type, false);
2649 else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
2650 likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2651 new_curseg(sbi, type, false);
2652 else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
2653 change_curseg(sbi, type);
2654 else
2655 new_curseg(sbi, type, false);
2656
2657 stat_inc_seg_type(sbi, curseg);
2658}
2659
2660void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
2661 unsigned int start, unsigned int end)
2662{
2663 struct curseg_info *curseg = CURSEG_I(sbi, type);
2664 unsigned int segno;
2665
2666 down_read(&SM_I(sbi)->curseg_lock);
2667 mutex_lock(&curseg->curseg_mutex);
2668 down_write(&SIT_I(sbi)->sentry_lock);
2669
2670 segno = CURSEG_I(sbi, type)->segno;
2671 if (segno < start || segno > end)
2672 goto unlock;
2673
2674 if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
2675 change_curseg(sbi, type);
2676 else
2677 new_curseg(sbi, type, true);
2678
2679 stat_inc_seg_type(sbi, curseg);
2680
2681 locate_dirty_segment(sbi, segno);
2682unlock:
2683 up_write(&SIT_I(sbi)->sentry_lock);
2684
2685 if (segno != curseg->segno)
2686 f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
2687 type, segno, curseg->segno);
2688
2689 mutex_unlock(&curseg->curseg_mutex);
2690 up_read(&SM_I(sbi)->curseg_lock);
2691}
2692
2693void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
2694{
2695 struct curseg_info *curseg;
2696 unsigned int old_segno;
2697 int i;
2698
2699 down_write(&SIT_I(sbi)->sentry_lock);
2700
2701 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2702 curseg = CURSEG_I(sbi, i);
2703 old_segno = curseg->segno;
2704 SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
2705 locate_dirty_segment(sbi, old_segno);
2706 }
2707
2708 up_write(&SIT_I(sbi)->sentry_lock);
2709}
2710
2711static const struct segment_allocation default_salloc_ops = {
2712 .allocate_segment = allocate_segment_by_default,
2713};
2714
2715bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
2716 struct cp_control *cpc)
2717{
2718 __u64 trim_start = cpc->trim_start;
2719 bool has_candidate = false;
2720
2721 down_write(&SIT_I(sbi)->sentry_lock);
2722 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
2723 if (add_discard_addrs(sbi, cpc, true)) {
2724 has_candidate = true;
2725 break;
2726 }
2727 }
2728 up_write(&SIT_I(sbi)->sentry_lock);
2729
2730 cpc->trim_start = trim_start;
2731 return has_candidate;
2732}
2733
2734static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
2735 struct discard_policy *dpolicy,
2736 unsigned int start, unsigned int end)
2737{
2738 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2739 struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
2740 struct rb_node **insert_p = NULL, *insert_parent = NULL;
2741 struct discard_cmd *dc;
2742 struct blk_plug plug;
2743 int issued;
2744 unsigned int trimmed = 0;
2745
2746next:
2747 issued = 0;
2748
2749 mutex_lock(&dcc->cmd_lock);
2750 if (unlikely(dcc->rbtree_check))
2751 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
2752 &dcc->root));
2753
2754 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
2755 NULL, start,
2756 (struct rb_entry **)&prev_dc,
2757 (struct rb_entry **)&next_dc,
2758 &insert_p, &insert_parent, true, NULL);
2759 if (!dc)
2760 dc = next_dc;
2761
2762 blk_start_plug(&plug);
2763
2764 while (dc && dc->lstart <= end) {
2765 struct rb_node *node;
2766 int err = 0;
2767
2768 if (dc->len < dpolicy->granularity)
2769 goto skip;
2770
2771 if (dc->state != D_PREP) {
2772 list_move_tail(&dc->list, &dcc->fstrim_list);
2773 goto skip;
2774 }
2775
2776 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
2777
2778 if (issued >= dpolicy->max_requests) {
2779 start = dc->lstart + dc->len;
2780
2781 if (err)
2782 __remove_discard_cmd(sbi, dc);
2783
2784 blk_finish_plug(&plug);
2785 mutex_unlock(&dcc->cmd_lock);
2786 trimmed += __wait_all_discard_cmd(sbi, NULL);
2787 congestion_wait(BLK_RW_ASYNC, HZ/50);
2788 goto next;
2789 }
2790skip:
2791 node = rb_next(&dc->rb_node);
2792 if (err)
2793 __remove_discard_cmd(sbi, dc);
2794 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
2795
2796 if (fatal_signal_pending(current))
2797 break;
2798 }
2799
2800 blk_finish_plug(&plug);
2801 mutex_unlock(&dcc->cmd_lock);
2802
2803 return trimmed;
2804}
2805
2806int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
2807{
2808 __u64 start = F2FS_BYTES_TO_BLK(range->start);
2809 __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
2810 unsigned int start_segno, end_segno;
2811 block_t start_block, end_block;
2812 struct cp_control cpc;
2813 struct discard_policy dpolicy;
2814 unsigned long long trimmed = 0;
2815 int err = 0;
2816 bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
2817
2818 if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
2819 return -EINVAL;
2820
2821 if (end < MAIN_BLKADDR(sbi))
2822 goto out;
2823
2824 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
2825 f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
2826 return -EFSCORRUPTED;
2827 }
2828
2829
2830 start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
2831 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
2832 GET_SEGNO(sbi, end);
2833 if (need_align) {
2834 start_segno = rounddown(start_segno, sbi->segs_per_sec);
2835 end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
2836 }
2837
2838 cpc.reason = CP_DISCARD;
2839 cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
2840 cpc.trim_start = start_segno;
2841 cpc.trim_end = end_segno;
2842
2843 if (sbi->discard_blks == 0)
2844 goto out;
2845
2846 mutex_lock(&sbi->gc_mutex);
2847 err = f2fs_write_checkpoint(sbi, &cpc);
2848 mutex_unlock(&sbi->gc_mutex);
2849 if (err)
2850 goto out;
2851
2852
2853
2854
2855
2856
2857
2858 if (f2fs_realtime_discard_enable(sbi))
2859 goto out;
2860
2861 start_block = START_BLOCK(sbi, start_segno);
2862 end_block = START_BLOCK(sbi, end_segno + 1);
2863
2864 __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
2865 trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
2866 start_block, end_block);
2867
2868 trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
2869 start_block, end_block);
2870out:
2871 if (!err)
2872 range->len = F2FS_BLK_TO_BYTES(trimmed);
2873 return err;
2874}
2875
2876static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
2877{
2878 struct curseg_info *curseg = CURSEG_I(sbi, type);
2879 if (curseg->next_blkoff < sbi->blocks_per_seg)
2880 return true;
2881 return false;
2882}
2883
2884int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
2885{
2886 switch (hint) {
2887 case WRITE_LIFE_SHORT:
2888 return CURSEG_HOT_DATA;
2889 case WRITE_LIFE_EXTREME:
2890 return CURSEG_COLD_DATA;
2891 default:
2892 return CURSEG_WARM_DATA;
2893 }
2894}
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
2958 enum page_type type, enum temp_type temp)
2959{
2960 if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
2961 if (type == DATA) {
2962 if (temp == WARM)
2963 return WRITE_LIFE_NOT_SET;
2964 else if (temp == HOT)
2965 return WRITE_LIFE_SHORT;
2966 else if (temp == COLD)
2967 return WRITE_LIFE_EXTREME;
2968 } else {
2969 return WRITE_LIFE_NOT_SET;
2970 }
2971 } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
2972 if (type == DATA) {
2973 if (temp == WARM)
2974 return WRITE_LIFE_LONG;
2975 else if (temp == HOT)
2976 return WRITE_LIFE_SHORT;
2977 else if (temp == COLD)
2978 return WRITE_LIFE_EXTREME;
2979 } else if (type == NODE) {
2980 if (temp == WARM || temp == HOT)
2981 return WRITE_LIFE_NOT_SET;
2982 else if (temp == COLD)
2983 return WRITE_LIFE_NONE;
2984 } else if (type == META) {
2985 return WRITE_LIFE_MEDIUM;
2986 }
2987 }
2988 return WRITE_LIFE_NOT_SET;
2989}
2990
2991static int __get_segment_type_2(struct f2fs_io_info *fio)
2992{
2993 if (fio->type == DATA)
2994 return CURSEG_HOT_DATA;
2995 else
2996 return CURSEG_HOT_NODE;
2997}
2998
2999static int __get_segment_type_4(struct f2fs_io_info *fio)
3000{
3001 if (fio->type == DATA) {
3002 struct inode *inode = fio->page->mapping->host;
3003
3004 if (S_ISDIR(inode->i_mode))
3005 return CURSEG_HOT_DATA;
3006 else
3007 return CURSEG_COLD_DATA;
3008 } else {
3009 if (IS_DNODE(fio->page) && is_cold_node(fio->page))
3010 return CURSEG_WARM_NODE;
3011 else
3012 return CURSEG_COLD_NODE;
3013 }
3014}
3015
3016static int __get_segment_type_6(struct f2fs_io_info *fio)
3017{
3018 if (fio->type == DATA) {
3019 struct inode *inode = fio->page->mapping->host;
3020
3021 if (is_cold_data(fio->page) || file_is_cold(inode))
3022 return CURSEG_COLD_DATA;
3023 if (file_is_hot(inode) ||
3024 is_inode_flag_set(inode, FI_HOT_DATA) ||
3025 f2fs_is_atomic_file(inode) ||
3026 f2fs_is_volatile_file(inode))
3027 return CURSEG_HOT_DATA;
3028 return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
3029 } else {
3030 if (IS_DNODE(fio->page))
3031 return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
3032 CURSEG_HOT_NODE;
3033 return CURSEG_COLD_NODE;
3034 }
3035}
3036
3037static int __get_segment_type(struct f2fs_io_info *fio)
3038{
3039 int type = 0;
3040
3041 switch (F2FS_OPTION(fio->sbi).active_logs) {
3042 case 2:
3043 type = __get_segment_type_2(fio);
3044 break;
3045 case 4:
3046 type = __get_segment_type_4(fio);
3047 break;
3048 case 6:
3049 type = __get_segment_type_6(fio);
3050 break;
3051 default:
3052 f2fs_bug_on(fio->sbi, true);
3053 }
3054
3055 if (IS_HOT(type))
3056 fio->temp = HOT;
3057 else if (IS_WARM(type))
3058 fio->temp = WARM;
3059 else
3060 fio->temp = COLD;
3061 return type;
3062}
3063
3064void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
3065 block_t old_blkaddr, block_t *new_blkaddr,
3066 struct f2fs_summary *sum, int type,
3067 struct f2fs_io_info *fio, bool add_list)
3068{
3069 struct sit_info *sit_i = SIT_I(sbi);
3070 struct curseg_info *curseg = CURSEG_I(sbi, type);
3071
3072 down_read(&SM_I(sbi)->curseg_lock);
3073
3074 mutex_lock(&curseg->curseg_mutex);
3075 down_write(&sit_i->sentry_lock);
3076
3077 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
3078
3079 f2fs_wait_discard_bio(sbi, *new_blkaddr);
3080
3081
3082
3083
3084
3085
3086 __add_sum_entry(sbi, type, sum);
3087
3088 __refresh_next_blkoff(sbi, curseg);
3089
3090 stat_inc_block_count(sbi, curseg);
3091
3092
3093
3094
3095
3096 update_sit_entry(sbi, *new_blkaddr, 1);
3097 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
3098 update_sit_entry(sbi, old_blkaddr, -1);
3099
3100 if (!__has_curseg_space(sbi, type))
3101 sit_i->s_ops->allocate_segment(sbi, type, false);
3102
3103
3104
3105
3106
3107
3108 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3109 locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3110
3111 up_write(&sit_i->sentry_lock);
3112
3113 if (page && IS_NODESEG(type)) {
3114 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
3115
3116 f2fs_inode_chksum_set(sbi, page);
3117 }
3118
3119 if (add_list) {
3120 struct f2fs_bio_info *io;
3121
3122 INIT_LIST_HEAD(&fio->list);
3123 fio->in_list = true;
3124 fio->retry = false;
3125 io = sbi->write_io[fio->type] + fio->temp;
3126 spin_lock(&io->io_lock);
3127 list_add_tail(&fio->list, &io->io_list);
3128 spin_unlock(&io->io_lock);
3129 }
3130
3131 mutex_unlock(&curseg->curseg_mutex);
3132
3133 up_read(&SM_I(sbi)->curseg_lock);
3134}
3135
3136static void update_device_state(struct f2fs_io_info *fio)
3137{
3138 struct f2fs_sb_info *sbi = fio->sbi;
3139 unsigned int devidx;
3140
3141 if (!f2fs_is_multi_device(sbi))
3142 return;
3143
3144 devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
3145
3146
3147 f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
3148
3149
3150 if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3151 spin_lock(&sbi->dev_lock);
3152 f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3153 spin_unlock(&sbi->dev_lock);
3154 }
3155}
3156
3157static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3158{
3159 int type = __get_segment_type(fio);
3160 bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA);
3161
3162 if (keep_order)
3163 down_read(&fio->sbi->io_order_lock);
3164reallocate:
3165 f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3166 &fio->new_blkaddr, sum, type, fio, true);
3167 if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
3168 invalidate_mapping_pages(META_MAPPING(fio->sbi),
3169 fio->old_blkaddr, fio->old_blkaddr);
3170
3171
3172 f2fs_submit_page_write(fio);
3173 if (fio->retry) {
3174 fio->old_blkaddr = fio->new_blkaddr;
3175 goto reallocate;
3176 }
3177
3178 update_device_state(fio);
3179
3180 if (keep_order)
3181 up_read(&fio->sbi->io_order_lock);
3182}
3183
3184void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
3185 enum iostat_type io_type)
3186{
3187 struct f2fs_io_info fio = {
3188 .sbi = sbi,
3189 .type = META,
3190 .temp = HOT,
3191 .op = REQ_OP_WRITE,
3192 .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3193 .old_blkaddr = page->index,
3194 .new_blkaddr = page->index,
3195 .page = page,
3196 .encrypted_page = NULL,
3197 .in_list = false,
3198 };
3199
3200 if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
3201 fio.op_flags &= ~REQ_META;
3202
3203 set_page_writeback(page);
3204 ClearPageError(page);
3205 f2fs_submit_page_write(&fio);
3206
3207 stat_inc_meta_count(sbi, page->index);
3208 f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
3209}
3210
3211void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3212{
3213 struct f2fs_summary sum;
3214
3215 set_summary(&sum, nid, 0, 0);
3216 do_write_page(&sum, fio);
3217
3218 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3219}
3220
3221void f2fs_outplace_write_data(struct dnode_of_data *dn,
3222 struct f2fs_io_info *fio)
3223{
3224 struct f2fs_sb_info *sbi = fio->sbi;
3225 struct f2fs_summary sum;
3226
3227 f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3228 set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3229 do_write_page(&sum, fio);
3230 f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3231
3232 f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
3233}
3234
3235int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3236{
3237 int err;
3238 struct f2fs_sb_info *sbi = fio->sbi;
3239 unsigned int segno;
3240
3241 fio->new_blkaddr = fio->old_blkaddr;
3242
3243 __get_segment_type(fio);
3244
3245 segno = GET_SEGNO(sbi, fio->new_blkaddr);
3246
3247 if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
3248 set_sbi_flag(sbi, SBI_NEED_FSCK);
3249 f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
3250 __func__, segno);
3251 return -EFSCORRUPTED;
3252 }
3253
3254 stat_inc_inplace_blocks(fio->sbi);
3255
3256 if (fio->bio)
3257 err = f2fs_merge_page_bio(fio);
3258 else
3259 err = f2fs_submit_page_bio(fio);
3260 if (!err) {
3261 update_device_state(fio);
3262 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3263 }
3264
3265 return err;
3266}
3267
3268static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3269 unsigned int segno)
3270{
3271 int i;
3272
3273 for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3274 if (CURSEG_I(sbi, i)->segno == segno)
3275 break;
3276 }
3277 return i;
3278}
3279
3280void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3281 block_t old_blkaddr, block_t new_blkaddr,
3282 bool recover_curseg, bool recover_newaddr)
3283{
3284 struct sit_info *sit_i = SIT_I(sbi);
3285 struct curseg_info *curseg;
3286 unsigned int segno, old_cursegno;
3287 struct seg_entry *se;
3288 int type;
3289 unsigned short old_blkoff;
3290
3291 segno = GET_SEGNO(sbi, new_blkaddr);
3292 se = get_seg_entry(sbi, segno);
3293 type = se->type;
3294
3295 down_write(&SM_I(sbi)->curseg_lock);
3296
3297 if (!recover_curseg) {
3298
3299 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
3300 if (old_blkaddr == NULL_ADDR)
3301 type = CURSEG_COLD_DATA;
3302 else
3303 type = CURSEG_WARM_DATA;
3304 }
3305 } else {
3306 if (IS_CURSEG(sbi, segno)) {
3307
3308 type = __f2fs_get_curseg(sbi, segno);
3309 f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
3310 } else {
3311 type = CURSEG_WARM_DATA;
3312 }
3313 }
3314
3315 f2fs_bug_on(sbi, !IS_DATASEG(type));
3316 curseg = CURSEG_I(sbi, type);
3317
3318 mutex_lock(&curseg->curseg_mutex);
3319 down_write(&sit_i->sentry_lock);
3320
3321 old_cursegno = curseg->segno;
3322 old_blkoff = curseg->next_blkoff;
3323
3324
3325 if (segno != curseg->segno) {
3326 curseg->next_segno = segno;
3327 change_curseg(sbi, type);
3328 }
3329
3330 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3331 __add_sum_entry(sbi, type, sum);
3332
3333 if (!recover_curseg || recover_newaddr)
3334 update_sit_entry(sbi, new_blkaddr, 1);
3335 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
3336 invalidate_mapping_pages(META_MAPPING(sbi),
3337 old_blkaddr, old_blkaddr);
3338 update_sit_entry(sbi, old_blkaddr, -1);
3339 }
3340
3341 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3342 locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
3343
3344 locate_dirty_segment(sbi, old_cursegno);
3345
3346 if (recover_curseg) {
3347 if (old_cursegno != curseg->segno) {
3348 curseg->next_segno = old_cursegno;
3349 change_curseg(sbi, type);
3350 }
3351 curseg->next_blkoff = old_blkoff;
3352 }
3353
3354 up_write(&sit_i->sentry_lock);
3355 mutex_unlock(&curseg->curseg_mutex);
3356 up_write(&SM_I(sbi)->curseg_lock);
3357}
3358
3359void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
3360 block_t old_addr, block_t new_addr,
3361 unsigned char version, bool recover_curseg,
3362 bool recover_newaddr)
3363{
3364 struct f2fs_summary sum;
3365
3366 set_summary(&sum, dn->nid, dn->ofs_in_node, version);
3367
3368 f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3369 recover_curseg, recover_newaddr);
3370
3371 f2fs_update_data_blkaddr(dn, new_addr);
3372}
3373
3374void f2fs_wait_on_page_writeback(struct page *page,
3375 enum page_type type, bool ordered, bool locked)
3376{
3377 if (PageWriteback(page)) {
3378 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
3379
3380 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
3381 if (ordered) {
3382 wait_on_page_writeback(page);
3383 f2fs_bug_on(sbi, locked && PageWriteback(page));
3384 } else {
3385 wait_for_stable_page(page);
3386 }
3387 }
3388}
3389
3390void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3391{
3392 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3393 struct page *cpage;
3394
3395 if (!f2fs_post_read_required(inode))
3396 return;
3397
3398 if (!__is_valid_data_blkaddr(blkaddr))
3399 return;
3400
3401 cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
3402 if (cpage) {
3403 f2fs_wait_on_page_writeback(cpage, DATA, true, true);
3404 f2fs_put_page(cpage, 1);
3405 }
3406}
3407
3408void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
3409 block_t len)
3410{
3411 block_t i;
3412
3413 for (i = 0; i < len; i++)
3414 f2fs_wait_on_block_writeback(inode, blkaddr + i);
3415}
3416
3417static int read_compacted_summaries(struct f2fs_sb_info *sbi)
3418{
3419 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3420 struct curseg_info *seg_i;
3421 unsigned char *kaddr;
3422 struct page *page;
3423 block_t start;
3424 int i, j, offset;
3425
3426 start = start_sum_block(sbi);
3427
3428 page = f2fs_get_meta_page(sbi, start++);
3429 if (IS_ERR(page))
3430 return PTR_ERR(page);
3431 kaddr = (unsigned char *)page_address(page);
3432
3433
3434 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3435 memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
3436
3437
3438 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3439 memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
3440 offset = 2 * SUM_JOURNAL_SIZE;
3441
3442
3443 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3444 unsigned short blk_off;
3445 unsigned int segno;
3446
3447 seg_i = CURSEG_I(sbi, i);
3448 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
3449 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
3450 if (blk_off > ENTRIES_IN_SUM) {
3451 f2fs_bug_on(sbi, 1);
3452 f2fs_put_page(page, 1);
3453 return -EFAULT;
3454 }
3455 seg_i->next_segno = segno;
3456 reset_curseg(sbi, i, 0);
3457 seg_i->alloc_type = ckpt->alloc_type[i];
3458 seg_i->next_blkoff = blk_off;
3459
3460 if (seg_i->alloc_type == SSR)
3461 blk_off = sbi->blocks_per_seg;
3462
3463 for (j = 0; j < blk_off; j++) {
3464 struct f2fs_summary *s;
3465 s = (struct f2fs_summary *)(kaddr + offset);
3466 seg_i->sum_blk->entries[j] = *s;
3467 offset += SUMMARY_SIZE;
3468 if (offset + SUMMARY_SIZE <= PAGE_SIZE -
3469 SUM_FOOTER_SIZE)
3470 continue;
3471
3472 f2fs_put_page(page, 1);
3473 page = NULL;
3474
3475 page = f2fs_get_meta_page(sbi, start++);
3476 if (IS_ERR(page))
3477 return PTR_ERR(page);
3478 kaddr = (unsigned char *)page_address(page);
3479 offset = 0;
3480 }
3481 }
3482 f2fs_put_page(page, 1);
3483 return 0;
3484}
3485
3486static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
3487{
3488 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3489 struct f2fs_summary_block *sum;
3490 struct curseg_info *curseg;
3491 struct page *new;
3492 unsigned short blk_off;
3493 unsigned int segno = 0;
3494 block_t blk_addr = 0;
3495 int err = 0;
3496
3497
3498 if (IS_DATASEG(type)) {
3499 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
3500 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
3501 CURSEG_HOT_DATA]);
3502 if (__exist_node_summaries(sbi))
3503 blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
3504 else
3505 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
3506 } else {
3507 segno = le32_to_cpu(ckpt->cur_node_segno[type -
3508 CURSEG_HOT_NODE]);
3509 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
3510 CURSEG_HOT_NODE]);
3511 if (__exist_node_summaries(sbi))
3512 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
3513 type - CURSEG_HOT_NODE);
3514 else
3515 blk_addr = GET_SUM_BLOCK(sbi, segno);
3516 }
3517
3518 new = f2fs_get_meta_page(sbi, blk_addr);
3519 if (IS_ERR(new))
3520 return PTR_ERR(new);
3521 sum = (struct f2fs_summary_block *)page_address(new);
3522
3523 if (IS_NODESEG(type)) {
3524 if (__exist_node_summaries(sbi)) {
3525 struct f2fs_summary *ns = &sum->entries[0];
3526 int i;
3527 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
3528 ns->version = 0;
3529 ns->ofs_in_node = 0;
3530 }
3531 } else {
3532 err = f2fs_restore_node_summary(sbi, segno, sum);
3533 if (err)
3534 goto out;
3535 }
3536 }
3537
3538
3539 curseg = CURSEG_I(sbi, type);
3540 mutex_lock(&curseg->curseg_mutex);
3541
3542
3543 down_write(&curseg->journal_rwsem);
3544 memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
3545 up_write(&curseg->journal_rwsem);
3546
3547 memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
3548 memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
3549 curseg->next_segno = segno;
3550 reset_curseg(sbi, type, 0);
3551 curseg->alloc_type = ckpt->alloc_type[type];
3552 curseg->next_blkoff = blk_off;
3553 mutex_unlock(&curseg->curseg_mutex);
3554out:
3555 f2fs_put_page(new, 1);
3556 return err;
3557}
3558
3559static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
3560{
3561 struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
3562 struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
3563 int type = CURSEG_HOT_DATA;
3564 int err;
3565
3566 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
3567 int npages = f2fs_npages_for_summary_flush(sbi, true);
3568
3569 if (npages >= 2)
3570 f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
3571 META_CP, true);
3572
3573
3574 err = read_compacted_summaries(sbi);
3575 if (err)
3576 return err;
3577 type = CURSEG_HOT_NODE;
3578 }
3579
3580 if (__exist_node_summaries(sbi))
3581 f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
3582 NR_CURSEG_TYPE - type, META_CP, true);
3583
3584 for (; type <= CURSEG_COLD_NODE; type++) {
3585 err = read_normal_summaries(sbi, type);
3586 if (err)
3587 return err;
3588 }
3589
3590
3591 if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
3592 sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
3593 f2fs_err(sbi, "invalid journal entries nats %u sits %u\n",
3594 nats_in_cursum(nat_j), sits_in_cursum(sit_j));
3595 return -EINVAL;
3596 }
3597
3598 return 0;
3599}
3600
3601static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
3602{
3603 struct page *page;
3604 unsigned char *kaddr;
3605 struct f2fs_summary *summary;
3606 struct curseg_info *seg_i;
3607 int written_size = 0;
3608 int i, j;
3609
3610 page = f2fs_grab_meta_page(sbi, blkaddr++);
3611 kaddr = (unsigned char *)page_address(page);
3612 memset(kaddr, 0, PAGE_SIZE);
3613
3614
3615 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3616 memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
3617 written_size += SUM_JOURNAL_SIZE;
3618
3619
3620 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3621 memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
3622 written_size += SUM_JOURNAL_SIZE;
3623
3624
3625 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3626 unsigned short blkoff;
3627 seg_i = CURSEG_I(sbi, i);
3628 if (sbi->ckpt->alloc_type[i] == SSR)
3629 blkoff = sbi->blocks_per_seg;
3630 else
3631 blkoff = curseg_blkoff(sbi, i);
3632
3633 for (j = 0; j < blkoff; j++) {
3634 if (!page) {
3635 page = f2fs_grab_meta_page(sbi, blkaddr++);
3636 kaddr = (unsigned char *)page_address(page);
3637 memset(kaddr, 0, PAGE_SIZE);
3638 written_size = 0;
3639 }
3640 summary = (struct f2fs_summary *)(kaddr + written_size);
3641 *summary = seg_i->sum_blk->entries[j];
3642 written_size += SUMMARY_SIZE;
3643
3644 if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
3645 SUM_FOOTER_SIZE)
3646 continue;
3647
3648 set_page_dirty(page);
3649 f2fs_put_page(page, 1);
3650 page = NULL;
3651 }
3652 }
3653 if (page) {
3654 set_page_dirty(page);
3655 f2fs_put_page(page, 1);
3656 }
3657}
3658
3659static void write_normal_summaries(struct f2fs_sb_info *sbi,
3660 block_t blkaddr, int type)
3661{
3662 int i, end;
3663 if (IS_DATASEG(type))
3664 end = type + NR_CURSEG_DATA_TYPE;
3665 else
3666 end = type + NR_CURSEG_NODE_TYPE;
3667
3668 for (i = type; i < end; i++)
3669 write_current_sum_page(sbi, i, blkaddr + (i - type));
3670}
3671
3672void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3673{
3674 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
3675 write_compacted_summaries(sbi, start_blk);
3676 else
3677 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
3678}
3679
3680void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3681{
3682 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
3683}
3684
3685int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
3686 unsigned int val, int alloc)
3687{
3688 int i;
3689
3690 if (type == NAT_JOURNAL) {
3691 for (i = 0; i < nats_in_cursum(journal); i++) {
3692 if (le32_to_cpu(nid_in_journal(journal, i)) == val)
3693 return i;
3694 }
3695 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
3696 return update_nats_in_cursum(journal, 1);
3697 } else if (type == SIT_JOURNAL) {
3698 for (i = 0; i < sits_in_cursum(journal); i++)
3699 if (le32_to_cpu(segno_in_journal(journal, i)) == val)
3700 return i;
3701 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
3702 return update_sits_in_cursum(journal, 1);
3703 }
3704 return -1;
3705}
3706
3707static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
3708 unsigned int segno)
3709{
3710 return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
3711}
3712
3713static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
3714 unsigned int start)
3715{
3716 struct sit_info *sit_i = SIT_I(sbi);
3717 struct page *page;
3718 pgoff_t src_off, dst_off;
3719
3720 src_off = current_sit_addr(sbi, start);
3721 dst_off = next_sit_addr(sbi, src_off);
3722
3723 page = f2fs_grab_meta_page(sbi, dst_off);
3724 seg_info_to_sit_page(sbi, page, start);
3725
3726 set_page_dirty(page);
3727 set_to_next_sit(sit_i, start);
3728
3729 return page;
3730}
3731
3732static struct sit_entry_set *grab_sit_entry_set(void)
3733{
3734 struct sit_entry_set *ses =
3735 f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
3736
3737 ses->entry_cnt = 0;
3738 INIT_LIST_HEAD(&ses->set_list);
3739 return ses;
3740}
3741
3742static void release_sit_entry_set(struct sit_entry_set *ses)
3743{
3744 list_del(&ses->set_list);
3745 kmem_cache_free(sit_entry_set_slab, ses);
3746}
3747
3748static void adjust_sit_entry_set(struct sit_entry_set *ses,
3749 struct list_head *head)
3750{
3751 struct sit_entry_set *next = ses;
3752
3753 if (list_is_last(&ses->set_list, head))
3754 return;
3755
3756 list_for_each_entry_continue(next, head, set_list)
3757 if (ses->entry_cnt <= next->entry_cnt)
3758 break;
3759
3760 list_move_tail(&ses->set_list, &next->set_list);
3761}
3762
3763static void add_sit_entry(unsigned int segno, struct list_head *head)
3764{
3765 struct sit_entry_set *ses;
3766 unsigned int start_segno = START_SEGNO(segno);
3767
3768 list_for_each_entry(ses, head, set_list) {
3769 if (ses->start_segno == start_segno) {
3770 ses->entry_cnt++;
3771 adjust_sit_entry_set(ses, head);
3772 return;
3773 }
3774 }
3775
3776 ses = grab_sit_entry_set();
3777
3778 ses->start_segno = start_segno;
3779 ses->entry_cnt++;
3780 list_add(&ses->set_list, head);
3781}
3782
3783static void add_sits_in_set(struct f2fs_sb_info *sbi)
3784{
3785 struct f2fs_sm_info *sm_info = SM_I(sbi);
3786 struct list_head *set_list = &sm_info->sit_entry_set;
3787 unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
3788 unsigned int segno;
3789
3790 for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
3791 add_sit_entry(segno, set_list);
3792}
3793
3794static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
3795{
3796 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3797 struct f2fs_journal *journal = curseg->journal;
3798 int i;
3799
3800 down_write(&curseg->journal_rwsem);
3801 for (i = 0; i < sits_in_cursum(journal); i++) {
3802 unsigned int segno;
3803 bool dirtied;
3804
3805 segno = le32_to_cpu(segno_in_journal(journal, i));
3806 dirtied = __mark_sit_entry_dirty(sbi, segno);
3807
3808 if (!dirtied)
3809 add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
3810 }
3811 update_sits_in_cursum(journal, -i);
3812 up_write(&curseg->journal_rwsem);
3813}
3814
3815
3816
3817
3818
3819void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
3820{
3821 struct sit_info *sit_i = SIT_I(sbi);
3822 unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
3823 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3824 struct f2fs_journal *journal = curseg->journal;
3825 struct sit_entry_set *ses, *tmp;
3826 struct list_head *head = &SM_I(sbi)->sit_entry_set;
3827 bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
3828 struct seg_entry *se;
3829
3830 down_write(&sit_i->sentry_lock);
3831
3832 if (!sit_i->dirty_sentries)
3833 goto out;
3834
3835
3836
3837
3838
3839 add_sits_in_set(sbi);
3840
3841
3842
3843
3844
3845
3846 if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
3847 !to_journal)
3848 remove_sits_in_journal(sbi);
3849
3850
3851
3852
3853
3854
3855 list_for_each_entry_safe(ses, tmp, head, set_list) {
3856 struct page *page = NULL;
3857 struct f2fs_sit_block *raw_sit = NULL;
3858 unsigned int start_segno = ses->start_segno;
3859 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
3860 (unsigned long)MAIN_SEGS(sbi));
3861 unsigned int segno = start_segno;
3862
3863 if (to_journal &&
3864 !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
3865 to_journal = false;
3866
3867 if (to_journal) {
3868 down_write(&curseg->journal_rwsem);
3869 } else {
3870 page = get_next_sit_page(sbi, start_segno);
3871 raw_sit = page_address(page);
3872 }
3873
3874
3875 for_each_set_bit_from(segno, bitmap, end) {
3876 int offset, sit_offset;
3877
3878 se = get_seg_entry(sbi, segno);
3879#ifdef CONFIG_F2FS_CHECK_FS
3880 if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
3881 SIT_VBLOCK_MAP_SIZE))
3882 f2fs_bug_on(sbi, 1);
3883#endif
3884
3885
3886 if (!(cpc->reason & CP_DISCARD)) {
3887 cpc->trim_start = segno;
3888 add_discard_addrs(sbi, cpc, false);
3889 }
3890
3891 if (to_journal) {
3892 offset = f2fs_lookup_journal_in_cursum(journal,
3893 SIT_JOURNAL, segno, 1);
3894 f2fs_bug_on(sbi, offset < 0);
3895 segno_in_journal(journal, offset) =
3896 cpu_to_le32(segno);
3897 seg_info_to_raw_sit(se,
3898 &sit_in_journal(journal, offset));
3899 check_block_count(sbi, segno,
3900 &sit_in_journal(journal, offset));
3901 } else {
3902 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
3903 seg_info_to_raw_sit(se,
3904 &raw_sit->entries[sit_offset]);
3905 check_block_count(sbi, segno,
3906 &raw_sit->entries[sit_offset]);
3907 }
3908
3909 __clear_bit(segno, bitmap);
3910 sit_i->dirty_sentries--;
3911 ses->entry_cnt--;
3912 }
3913
3914 if (to_journal)
3915 up_write(&curseg->journal_rwsem);
3916 else
3917 f2fs_put_page(page, 1);
3918
3919 f2fs_bug_on(sbi, ses->entry_cnt);
3920 release_sit_entry_set(ses);
3921 }
3922
3923 f2fs_bug_on(sbi, !list_empty(head));
3924 f2fs_bug_on(sbi, sit_i->dirty_sentries);
3925out:
3926 if (cpc->reason & CP_DISCARD) {
3927 __u64 trim_start = cpc->trim_start;
3928
3929 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
3930 add_discard_addrs(sbi, cpc, false);
3931
3932 cpc->trim_start = trim_start;
3933 }
3934 up_write(&sit_i->sentry_lock);
3935
3936 set_prefree_as_free_segments(sbi);
3937}
3938
3939static int build_sit_info(struct f2fs_sb_info *sbi)
3940{
3941 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
3942 struct sit_info *sit_i;
3943 unsigned int sit_segs, start;
3944 char *src_bitmap;
3945 unsigned int bitmap_size;
3946
3947
3948 sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
3949 if (!sit_i)
3950 return -ENOMEM;
3951
3952 SM_I(sbi)->sit_info = sit_i;
3953
3954 sit_i->sentries =
3955 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
3956 MAIN_SEGS(sbi)),
3957 GFP_KERNEL);
3958 if (!sit_i->sentries)
3959 return -ENOMEM;
3960
3961 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
3962 sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size,
3963 GFP_KERNEL);
3964 if (!sit_i->dirty_sentries_bitmap)
3965 return -ENOMEM;
3966
3967 for (start = 0; start < MAIN_SEGS(sbi); start++) {
3968 sit_i->sentries[start].cur_valid_map
3969 = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3970 sit_i->sentries[start].ckpt_valid_map
3971 = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3972 if (!sit_i->sentries[start].cur_valid_map ||
3973 !sit_i->sentries[start].ckpt_valid_map)
3974 return -ENOMEM;
3975
3976#ifdef CONFIG_F2FS_CHECK_FS
3977 sit_i->sentries[start].cur_valid_map_mir
3978 = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3979 if (!sit_i->sentries[start].cur_valid_map_mir)
3980 return -ENOMEM;
3981#endif
3982
3983 sit_i->sentries[start].discard_map
3984 = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
3985 GFP_KERNEL);
3986 if (!sit_i->sentries[start].discard_map)
3987 return -ENOMEM;
3988 }
3989
3990 sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3991 if (!sit_i->tmp_map)
3992 return -ENOMEM;
3993
3994 if (__is_large_section(sbi)) {
3995 sit_i->sec_entries =
3996 f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
3997 MAIN_SECS(sbi)),
3998 GFP_KERNEL);
3999 if (!sit_i->sec_entries)
4000 return -ENOMEM;
4001 }
4002
4003
4004 sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
4005
4006
4007 bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
4008 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
4009
4010 sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
4011 if (!sit_i->sit_bitmap)
4012 return -ENOMEM;
4013
4014#ifdef CONFIG_F2FS_CHECK_FS
4015 sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
4016 if (!sit_i->sit_bitmap_mir)
4017 return -ENOMEM;
4018#endif
4019
4020
4021 sit_i->s_ops = &default_salloc_ops;
4022
4023 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
4024 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
4025 sit_i->written_valid_blocks = 0;
4026 sit_i->bitmap_size = bitmap_size;
4027 sit_i->dirty_sentries = 0;
4028 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
4029 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
4030 sit_i->mounted_time = ktime_get_real_seconds();
4031 init_rwsem(&sit_i->sentry_lock);
4032 return 0;
4033}
4034
4035static int build_free_segmap(struct f2fs_sb_info *sbi)
4036{
4037 struct free_segmap_info *free_i;
4038 unsigned int bitmap_size, sec_bitmap_size;
4039
4040
4041 free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
4042 if (!free_i)
4043 return -ENOMEM;
4044
4045 SM_I(sbi)->free_info = free_i;
4046
4047 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4048 free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
4049 if (!free_i->free_segmap)
4050 return -ENOMEM;
4051
4052 sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4053 free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
4054 if (!free_i->free_secmap)
4055 return -ENOMEM;
4056
4057
4058 memset(free_i->free_segmap, 0xff, bitmap_size);
4059 memset(free_i->free_secmap, 0xff, sec_bitmap_size);
4060
4061
4062 free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
4063 free_i->free_segments = 0;
4064 free_i->free_sections = 0;
4065 spin_lock_init(&free_i->segmap_lock);
4066 return 0;
4067}
4068
4069static int build_curseg(struct f2fs_sb_info *sbi)
4070{
4071 struct curseg_info *array;
4072 int i;
4073
4074 array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
4075 GFP_KERNEL);
4076 if (!array)
4077 return -ENOMEM;
4078
4079 SM_I(sbi)->curseg_array = array;
4080
4081 for (i = 0; i < NR_CURSEG_TYPE; i++) {
4082 mutex_init(&array[i].curseg_mutex);
4083 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
4084 if (!array[i].sum_blk)
4085 return -ENOMEM;
4086 init_rwsem(&array[i].journal_rwsem);
4087 array[i].journal = f2fs_kzalloc(sbi,
4088 sizeof(struct f2fs_journal), GFP_KERNEL);
4089 if (!array[i].journal)
4090 return -ENOMEM;
4091 array[i].segno = NULL_SEGNO;
4092 array[i].next_blkoff = 0;
4093 }
4094 return restore_curseg_summaries(sbi);
4095}
4096
4097static int build_sit_entries(struct f2fs_sb_info *sbi)
4098{
4099 struct sit_info *sit_i = SIT_I(sbi);
4100 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4101 struct f2fs_journal *journal = curseg->journal;
4102 struct seg_entry *se;
4103 struct f2fs_sit_entry sit;
4104 int sit_blk_cnt = SIT_BLK_CNT(sbi);
4105 unsigned int i, start, end;
4106 unsigned int readed, start_blk = 0;
4107 int err = 0;
4108 block_t total_node_blocks = 0;
4109
4110 do {
4111 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
4112 META_SIT, true);
4113
4114 start = start_blk * sit_i->sents_per_block;
4115 end = (start_blk + readed) * sit_i->sents_per_block;
4116
4117 for (; start < end && start < MAIN_SEGS(sbi); start++) {
4118 struct f2fs_sit_block *sit_blk;
4119 struct page *page;
4120
4121 se = &sit_i->sentries[start];
4122 page = get_current_sit_page(sbi, start);
4123 if (IS_ERR(page))
4124 return PTR_ERR(page);
4125 sit_blk = (struct f2fs_sit_block *)page_address(page);
4126 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
4127 f2fs_put_page(page, 1);
4128
4129 err = check_block_count(sbi, start, &sit);
4130 if (err)
4131 return err;
4132 seg_info_from_raw_sit(se, &sit);
4133 if (IS_NODESEG(se->type))
4134 total_node_blocks += se->valid_blocks;
4135
4136
4137 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4138 memset(se->discard_map, 0xff,
4139 SIT_VBLOCK_MAP_SIZE);
4140 } else {
4141 memcpy(se->discard_map,
4142 se->cur_valid_map,
4143 SIT_VBLOCK_MAP_SIZE);
4144 sbi->discard_blks +=
4145 sbi->blocks_per_seg -
4146 se->valid_blocks;
4147 }
4148
4149 if (__is_large_section(sbi))
4150 get_sec_entry(sbi, start)->valid_blocks +=
4151 se->valid_blocks;
4152 }
4153 start_blk += readed;
4154 } while (start_blk < sit_blk_cnt);
4155
4156 down_read(&curseg->journal_rwsem);
4157 for (i = 0; i < sits_in_cursum(journal); i++) {
4158 unsigned int old_valid_blocks;
4159
4160 start = le32_to_cpu(segno_in_journal(journal, i));
4161 if (start >= MAIN_SEGS(sbi)) {
4162 f2fs_err(sbi, "Wrong journal entry on segno %u",
4163 start);
4164 set_sbi_flag(sbi, SBI_NEED_FSCK);
4165 err = -EFSCORRUPTED;
4166 break;
4167 }
4168
4169 se = &sit_i->sentries[start];
4170 sit = sit_in_journal(journal, i);
4171
4172 old_valid_blocks = se->valid_blocks;
4173 if (IS_NODESEG(se->type))
4174 total_node_blocks -= old_valid_blocks;
4175
4176 err = check_block_count(sbi, start, &sit);
4177 if (err)
4178 break;
4179 seg_info_from_raw_sit(se, &sit);
4180 if (IS_NODESEG(se->type))
4181 total_node_blocks += se->valid_blocks;
4182
4183 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4184 memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
4185 } else {
4186 memcpy(se->discard_map, se->cur_valid_map,
4187 SIT_VBLOCK_MAP_SIZE);
4188 sbi->discard_blks += old_valid_blocks;
4189 sbi->discard_blks -= se->valid_blocks;
4190 }
4191
4192 if (__is_large_section(sbi)) {
4193 get_sec_entry(sbi, start)->valid_blocks +=
4194 se->valid_blocks;
4195 get_sec_entry(sbi, start)->valid_blocks -=
4196 old_valid_blocks;
4197 }
4198 }
4199 up_read(&curseg->journal_rwsem);
4200
4201 if (!err && total_node_blocks != valid_node_count(sbi)) {
4202 f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
4203 total_node_blocks, valid_node_count(sbi));
4204 set_sbi_flag(sbi, SBI_NEED_FSCK);
4205 err = -EFSCORRUPTED;
4206 }
4207
4208 return err;
4209}
4210
4211static void init_free_segmap(struct f2fs_sb_info *sbi)
4212{
4213 unsigned int start;
4214 int type;
4215
4216 for (start = 0; start < MAIN_SEGS(sbi); start++) {
4217 struct seg_entry *sentry = get_seg_entry(sbi, start);
4218 if (!sentry->valid_blocks)
4219 __set_free(sbi, start);
4220 else
4221 SIT_I(sbi)->written_valid_blocks +=
4222 sentry->valid_blocks;
4223 }
4224
4225
4226 for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4227 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4228 __set_test_and_inuse(sbi, curseg_t->segno);
4229 }
4230}
4231
4232static void init_dirty_segmap(struct f2fs_sb_info *sbi)
4233{
4234 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4235 struct free_segmap_info *free_i = FREE_I(sbi);
4236 unsigned int segno = 0, offset = 0;
4237 unsigned short valid_blocks;
4238
4239 while (1) {
4240
4241 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
4242 if (segno >= MAIN_SEGS(sbi))
4243 break;
4244 offset = segno + 1;
4245 valid_blocks = get_valid_blocks(sbi, segno, false);
4246 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
4247 continue;
4248 if (valid_blocks > sbi->blocks_per_seg) {
4249 f2fs_bug_on(sbi, 1);
4250 continue;
4251 }
4252 mutex_lock(&dirty_i->seglist_lock);
4253 __locate_dirty_segment(sbi, segno, DIRTY);
4254 mutex_unlock(&dirty_i->seglist_lock);
4255 }
4256}
4257
4258static int init_victim_secmap(struct f2fs_sb_info *sbi)
4259{
4260 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4261 unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4262
4263 dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4264 if (!dirty_i->victim_secmap)
4265 return -ENOMEM;
4266 return 0;
4267}
4268
4269static int build_dirty_segmap(struct f2fs_sb_info *sbi)
4270{
4271 struct dirty_seglist_info *dirty_i;
4272 unsigned int bitmap_size, i;
4273
4274
4275 dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
4276 GFP_KERNEL);
4277 if (!dirty_i)
4278 return -ENOMEM;
4279
4280 SM_I(sbi)->dirty_info = dirty_i;
4281 mutex_init(&dirty_i->seglist_lock);
4282
4283 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4284
4285 for (i = 0; i < NR_DIRTY_TYPE; i++) {
4286 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
4287 GFP_KERNEL);
4288 if (!dirty_i->dirty_segmap[i])
4289 return -ENOMEM;
4290 }
4291
4292 init_dirty_segmap(sbi);
4293 return init_victim_secmap(sbi);
4294}
4295
4296static int sanity_check_curseg(struct f2fs_sb_info *sbi)
4297{
4298 int i;
4299
4300
4301
4302
4303
4304 for (i = 0; i < NO_CHECK_TYPE; i++) {
4305 struct curseg_info *curseg = CURSEG_I(sbi, i);
4306 struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
4307 unsigned int blkofs = curseg->next_blkoff;
4308
4309 if (f2fs_test_bit(blkofs, se->cur_valid_map))
4310 goto out;
4311
4312 if (curseg->alloc_type == SSR)
4313 continue;
4314
4315 for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
4316 if (!f2fs_test_bit(blkofs, se->cur_valid_map))
4317 continue;
4318out:
4319 f2fs_err(sbi,
4320 "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
4321 i, curseg->segno, curseg->alloc_type,
4322 curseg->next_blkoff, blkofs);
4323 return -EFSCORRUPTED;
4324 }
4325 }
4326 return 0;
4327}
4328
4329
4330
4331
4332static void init_min_max_mtime(struct f2fs_sb_info *sbi)
4333{
4334 struct sit_info *sit_i = SIT_I(sbi);
4335 unsigned int segno;
4336
4337 down_write(&sit_i->sentry_lock);
4338
4339 sit_i->min_mtime = ULLONG_MAX;
4340
4341 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
4342 unsigned int i;
4343 unsigned long long mtime = 0;
4344
4345 for (i = 0; i < sbi->segs_per_sec; i++)
4346 mtime += get_seg_entry(sbi, segno + i)->mtime;
4347
4348 mtime = div_u64(mtime, sbi->segs_per_sec);
4349
4350 if (sit_i->min_mtime > mtime)
4351 sit_i->min_mtime = mtime;
4352 }
4353 sit_i->max_mtime = get_mtime(sbi, false);
4354 up_write(&sit_i->sentry_lock);
4355}
4356
4357int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
4358{
4359 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4360 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
4361 struct f2fs_sm_info *sm_info;
4362 int err;
4363
4364 sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
4365 if (!sm_info)
4366 return -ENOMEM;
4367
4368
4369 sbi->sm_info = sm_info;
4370 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
4371 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
4372 sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
4373 sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
4374 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
4375 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
4376 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
4377 sm_info->rec_prefree_segments = sm_info->main_segments *
4378 DEF_RECLAIM_PREFREE_SEGMENTS / 100;
4379 if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
4380 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
4381
4382 if (!test_opt(sbi, LFS))
4383 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
4384 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
4385 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
4386 sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
4387 sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
4388 sm_info->min_ssr_sections = reserved_sections(sbi);
4389
4390 INIT_LIST_HEAD(&sm_info->sit_entry_set);
4391
4392 init_rwsem(&sm_info->curseg_lock);
4393
4394 if (!f2fs_readonly(sbi->sb)) {
4395 err = f2fs_create_flush_cmd_control(sbi);
4396 if (err)
4397 return err;
4398 }
4399
4400 err = create_discard_cmd_control(sbi);
4401 if (err)
4402 return err;
4403
4404 err = build_sit_info(sbi);
4405 if (err)
4406 return err;
4407 err = build_free_segmap(sbi);
4408 if (err)
4409 return err;
4410 err = build_curseg(sbi);
4411 if (err)
4412 return err;
4413
4414
4415 err = build_sit_entries(sbi);
4416 if (err)
4417 return err;
4418
4419 init_free_segmap(sbi);
4420 err = build_dirty_segmap(sbi);
4421 if (err)
4422 return err;
4423
4424 err = sanity_check_curseg(sbi);
4425 if (err)
4426 return err;
4427
4428 init_min_max_mtime(sbi);
4429 return 0;
4430}
4431
4432static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
4433 enum dirty_type dirty_type)
4434{
4435 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4436
4437 mutex_lock(&dirty_i->seglist_lock);
4438 kvfree(dirty_i->dirty_segmap[dirty_type]);
4439 dirty_i->nr_dirty[dirty_type] = 0;
4440 mutex_unlock(&dirty_i->seglist_lock);
4441}
4442
4443static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
4444{
4445 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4446 kvfree(dirty_i->victim_secmap);
4447}
4448
4449static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
4450{
4451 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4452 int i;
4453
4454 if (!dirty_i)
4455 return;
4456
4457
4458 for (i = 0; i < NR_DIRTY_TYPE; i++)
4459 discard_dirty_segmap(sbi, i);
4460
4461 destroy_victim_secmap(sbi);
4462 SM_I(sbi)->dirty_info = NULL;
4463 kvfree(dirty_i);
4464}
4465
4466static void destroy_curseg(struct f2fs_sb_info *sbi)
4467{
4468 struct curseg_info *array = SM_I(sbi)->curseg_array;
4469 int i;
4470
4471 if (!array)
4472 return;
4473 SM_I(sbi)->curseg_array = NULL;
4474 for (i = 0; i < NR_CURSEG_TYPE; i++) {
4475 kvfree(array[i].sum_blk);
4476 kvfree(array[i].journal);
4477 }
4478 kvfree(array);
4479}
4480
4481static void destroy_free_segmap(struct f2fs_sb_info *sbi)
4482{
4483 struct free_segmap_info *free_i = SM_I(sbi)->free_info;
4484 if (!free_i)
4485 return;
4486 SM_I(sbi)->free_info = NULL;
4487 kvfree(free_i->free_segmap);
4488 kvfree(free_i->free_secmap);
4489 kvfree(free_i);
4490}
4491
4492static void destroy_sit_info(struct f2fs_sb_info *sbi)
4493{
4494 struct sit_info *sit_i = SIT_I(sbi);
4495 unsigned int start;
4496
4497 if (!sit_i)
4498 return;
4499
4500 if (sit_i->sentries) {
4501 for (start = 0; start < MAIN_SEGS(sbi); start++) {
4502 kvfree(sit_i->sentries[start].cur_valid_map);
4503#ifdef CONFIG_F2FS_CHECK_FS
4504 kvfree(sit_i->sentries[start].cur_valid_map_mir);
4505#endif
4506 kvfree(sit_i->sentries[start].ckpt_valid_map);
4507 kvfree(sit_i->sentries[start].discard_map);
4508 }
4509 }
4510 kvfree(sit_i->tmp_map);
4511
4512 kvfree(sit_i->sentries);
4513 kvfree(sit_i->sec_entries);
4514 kvfree(sit_i->dirty_sentries_bitmap);
4515
4516 SM_I(sbi)->sit_info = NULL;
4517 kvfree(sit_i->sit_bitmap);
4518#ifdef CONFIG_F2FS_CHECK_FS
4519 kvfree(sit_i->sit_bitmap_mir);
4520#endif
4521 kvfree(sit_i);
4522}
4523
4524void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
4525{
4526 struct f2fs_sm_info *sm_info = SM_I(sbi);
4527
4528 if (!sm_info)
4529 return;
4530 f2fs_destroy_flush_cmd_control(sbi, true);
4531 destroy_discard_cmd_control(sbi);
4532 destroy_dirty_segmap(sbi);
4533 destroy_curseg(sbi);
4534 destroy_free_segmap(sbi);
4535 destroy_sit_info(sbi);
4536 sbi->sm_info = NULL;
4537 kvfree(sm_info);
4538}
4539
4540int __init f2fs_create_segment_manager_caches(void)
4541{
4542 discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
4543 sizeof(struct discard_entry));
4544 if (!discard_entry_slab)
4545 goto fail;
4546
4547 discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
4548 sizeof(struct discard_cmd));
4549 if (!discard_cmd_slab)
4550 goto destroy_discard_entry;
4551
4552 sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
4553 sizeof(struct sit_entry_set));
4554 if (!sit_entry_set_slab)
4555 goto destroy_discard_cmd;
4556
4557 inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
4558 sizeof(struct inmem_pages));
4559 if (!inmem_entry_slab)
4560 goto destroy_sit_entry_set;
4561 return 0;
4562
4563destroy_sit_entry_set:
4564 kmem_cache_destroy(sit_entry_set_slab);
4565destroy_discard_cmd:
4566 kmem_cache_destroy(discard_cmd_slab);
4567destroy_discard_entry:
4568 kmem_cache_destroy(discard_entry_slab);
4569fail:
4570 return -ENOMEM;
4571}
4572
4573void f2fs_destroy_segment_manager_caches(void)
4574{
4575 kmem_cache_destroy(sit_entry_set_slab);
4576 kmem_cache_destroy(discard_cmd_slab);
4577 kmem_cache_destroy(discard_entry_slab);
4578 kmem_cache_destroy(inmem_entry_slab);
4579}
4580