1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/module.h>
23#include <linux/time.h>
24#include <linux/fs.h>
25#include <linux/jbd2.h>
26#include <linux/errno.h>
27#include <linux/slab.h>
28#include <linux/init.h>
29#include <linux/mm.h>
30#include <linux/freezer.h>
31#include <linux/pagemap.h>
32#include <linux/kthread.h>
33#include <linux/poison.h>
34#include <linux/proc_fs.h>
35#include <linux/seq_file.h>
36#include <linux/math64.h>
37#include <linux/hash.h>
38#include <linux/log2.h>
39#include <linux/vmalloc.h>
40#include <linux/backing-dev.h>
41#include <linux/bitops.h>
42#include <linux/ratelimit.h>
43#include <linux/sched/mm.h>
44
45#define CREATE_TRACE_POINTS
46#include <trace/events/jbd2.h>
47
48#include <linux/uaccess.h>
49#include <asm/page.h>
50
51#ifdef CONFIG_JBD2_DEBUG
52ushort jbd2_journal_enable_debug __read_mostly;
53EXPORT_SYMBOL(jbd2_journal_enable_debug);
54
55module_param_named(jbd2_debug, jbd2_journal_enable_debug, ushort, 0644);
56MODULE_PARM_DESC(jbd2_debug, "Debugging level for jbd2");
57#endif
58
59EXPORT_SYMBOL(jbd2_journal_extend);
60EXPORT_SYMBOL(jbd2_journal_stop);
61EXPORT_SYMBOL(jbd2_journal_lock_updates);
62EXPORT_SYMBOL(jbd2_journal_unlock_updates);
63EXPORT_SYMBOL(jbd2_journal_get_write_access);
64EXPORT_SYMBOL(jbd2_journal_get_create_access);
65EXPORT_SYMBOL(jbd2_journal_get_undo_access);
66EXPORT_SYMBOL(jbd2_journal_set_triggers);
67EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
68EXPORT_SYMBOL(jbd2_journal_forget);
69EXPORT_SYMBOL(jbd2_journal_flush);
70EXPORT_SYMBOL(jbd2_journal_revoke);
71
72EXPORT_SYMBOL(jbd2_journal_init_dev);
73EXPORT_SYMBOL(jbd2_journal_init_inode);
74EXPORT_SYMBOL(jbd2_journal_check_used_features);
75EXPORT_SYMBOL(jbd2_journal_check_available_features);
76EXPORT_SYMBOL(jbd2_journal_set_features);
77EXPORT_SYMBOL(jbd2_journal_load);
78EXPORT_SYMBOL(jbd2_journal_destroy);
79EXPORT_SYMBOL(jbd2_journal_abort);
80EXPORT_SYMBOL(jbd2_journal_errno);
81EXPORT_SYMBOL(jbd2_journal_ack_err);
82EXPORT_SYMBOL(jbd2_journal_clear_err);
83EXPORT_SYMBOL(jbd2_log_wait_commit);
84EXPORT_SYMBOL(jbd2_log_start_commit);
85EXPORT_SYMBOL(jbd2_journal_start_commit);
86EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
87EXPORT_SYMBOL(jbd2_journal_wipe);
88EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
89EXPORT_SYMBOL(jbd2_journal_invalidatepage);
90EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
91EXPORT_SYMBOL(jbd2_journal_force_commit);
92EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
93EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
94EXPORT_SYMBOL(jbd2_journal_submit_inode_data_buffers);
95EXPORT_SYMBOL(jbd2_journal_finish_inode_data_buffers);
96EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
97EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
98EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
99EXPORT_SYMBOL(jbd2_inode_cache);
100
101static int jbd2_journal_create_slab(size_t slab_size);
102
103#ifdef CONFIG_JBD2_DEBUG
104void __jbd2_debug(int level, const char *file, const char *func,
105 unsigned int line, const char *fmt, ...)
106{
107 struct va_format vaf;
108 va_list args;
109
110 if (level > jbd2_journal_enable_debug)
111 return;
112 va_start(args, fmt);
113 vaf.fmt = fmt;
114 vaf.va = &args;
115 printk(KERN_DEBUG "%s: (%s, %u): %pV", file, func, line, &vaf);
116 va_end(args);
117}
118EXPORT_SYMBOL(__jbd2_debug);
119#endif
120
121
122static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
123{
124 if (!jbd2_journal_has_csum_v2or3_feature(j))
125 return 1;
126
127 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
128}
129
130static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
131{
132 __u32 csum;
133 __be32 old_csum;
134
135 old_csum = sb->s_checksum;
136 sb->s_checksum = 0;
137 csum = jbd2_chksum(j, ~0, (char *)sb, sizeof(journal_superblock_t));
138 sb->s_checksum = old_csum;
139
140 return cpu_to_be32(csum);
141}
142
143
144
145
146
147static void commit_timeout(struct timer_list *t)
148{
149 journal_t *journal = from_timer(journal, t, j_commit_timer);
150
151 wake_up_process(journal->j_task);
152}
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172static int kjournald2(void *arg)
173{
174 journal_t *journal = arg;
175 transaction_t *transaction;
176
177
178
179
180
181 timer_setup(&journal->j_commit_timer, commit_timeout, 0);
182
183 set_freezable();
184
185
186 journal->j_task = current;
187 wake_up(&journal->j_wait_done_commit);
188
189
190
191
192
193
194
195 memalloc_nofs_save();
196
197
198
199
200 write_lock(&journal->j_state_lock);
201
202loop:
203 if (journal->j_flags & JBD2_UNMOUNT)
204 goto end_loop;
205
206 jbd_debug(1, "commit_sequence=%u, commit_request=%u\n",
207 journal->j_commit_sequence, journal->j_commit_request);
208
209 if (journal->j_commit_sequence != journal->j_commit_request) {
210 jbd_debug(1, "OK, requests differ\n");
211 write_unlock(&journal->j_state_lock);
212 del_timer_sync(&journal->j_commit_timer);
213 jbd2_journal_commit_transaction(journal);
214 write_lock(&journal->j_state_lock);
215 goto loop;
216 }
217
218 wake_up(&journal->j_wait_done_commit);
219 if (freezing(current)) {
220
221
222
223
224
225 jbd_debug(1, "Now suspending kjournald2\n");
226 write_unlock(&journal->j_state_lock);
227 try_to_freeze();
228 write_lock(&journal->j_state_lock);
229 } else {
230
231
232
233
234 DEFINE_WAIT(wait);
235 int should_sleep = 1;
236
237 prepare_to_wait(&journal->j_wait_commit, &wait,
238 TASK_INTERRUPTIBLE);
239 if (journal->j_commit_sequence != journal->j_commit_request)
240 should_sleep = 0;
241 transaction = journal->j_running_transaction;
242 if (transaction && time_after_eq(jiffies,
243 transaction->t_expires))
244 should_sleep = 0;
245 if (journal->j_flags & JBD2_UNMOUNT)
246 should_sleep = 0;
247 if (should_sleep) {
248 write_unlock(&journal->j_state_lock);
249 schedule();
250 write_lock(&journal->j_state_lock);
251 }
252 finish_wait(&journal->j_wait_commit, &wait);
253 }
254
255 jbd_debug(1, "kjournald2 wakes\n");
256
257
258
259
260 transaction = journal->j_running_transaction;
261 if (transaction && time_after_eq(jiffies, transaction->t_expires)) {
262 journal->j_commit_request = transaction->t_tid;
263 jbd_debug(1, "woke because of timeout\n");
264 }
265 goto loop;
266
267end_loop:
268 del_timer_sync(&journal->j_commit_timer);
269 journal->j_task = NULL;
270 wake_up(&journal->j_wait_done_commit);
271 jbd_debug(1, "Journal thread exiting.\n");
272 write_unlock(&journal->j_state_lock);
273 return 0;
274}
275
276static int jbd2_journal_start_thread(journal_t *journal)
277{
278 struct task_struct *t;
279
280 t = kthread_run(kjournald2, journal, "jbd2/%s",
281 journal->j_devname);
282 if (IS_ERR(t))
283 return PTR_ERR(t);
284
285 wait_event(journal->j_wait_done_commit, journal->j_task != NULL);
286 return 0;
287}
288
289static void journal_kill_thread(journal_t *journal)
290{
291 write_lock(&journal->j_state_lock);
292 journal->j_flags |= JBD2_UNMOUNT;
293
294 while (journal->j_task) {
295 write_unlock(&journal->j_state_lock);
296 wake_up(&journal->j_wait_commit);
297 wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
298 write_lock(&journal->j_state_lock);
299 }
300 write_unlock(&journal->j_state_lock);
301}
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
339 struct journal_head *jh_in,
340 struct buffer_head **bh_out,
341 sector_t blocknr)
342{
343 int need_copy_out = 0;
344 int done_copy_out = 0;
345 int do_escape = 0;
346 char *mapped_data;
347 struct buffer_head *new_bh;
348 struct page *new_page;
349 unsigned int new_offset;
350 struct buffer_head *bh_in = jh2bh(jh_in);
351 journal_t *journal = transaction->t_journal;
352
353
354
355
356
357
358
359
360
361
362 J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
363
364 new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
365
366
367 atomic_set(&new_bh->b_count, 1);
368
369 spin_lock(&jh_in->b_state_lock);
370repeat:
371
372
373
374
375 if (jh_in->b_frozen_data) {
376 done_copy_out = 1;
377 new_page = virt_to_page(jh_in->b_frozen_data);
378 new_offset = offset_in_page(jh_in->b_frozen_data);
379 } else {
380 new_page = jh2bh(jh_in)->b_page;
381 new_offset = offset_in_page(jh2bh(jh_in)->b_data);
382 }
383
384 mapped_data = kmap_atomic(new_page);
385
386
387
388
389
390
391 if (!done_copy_out)
392 jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
393 jh_in->b_triggers);
394
395
396
397
398 if (*((__be32 *)(mapped_data + new_offset)) ==
399 cpu_to_be32(JBD2_MAGIC_NUMBER)) {
400 need_copy_out = 1;
401 do_escape = 1;
402 }
403 kunmap_atomic(mapped_data);
404
405
406
407
408 if (need_copy_out && !done_copy_out) {
409 char *tmp;
410
411 spin_unlock(&jh_in->b_state_lock);
412 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
413 if (!tmp) {
414 brelse(new_bh);
415 return -ENOMEM;
416 }
417 spin_lock(&jh_in->b_state_lock);
418 if (jh_in->b_frozen_data) {
419 jbd2_free(tmp, bh_in->b_size);
420 goto repeat;
421 }
422
423 jh_in->b_frozen_data = tmp;
424 mapped_data = kmap_atomic(new_page);
425 memcpy(tmp, mapped_data + new_offset, bh_in->b_size);
426 kunmap_atomic(mapped_data);
427
428 new_page = virt_to_page(tmp);
429 new_offset = offset_in_page(tmp);
430 done_copy_out = 1;
431
432
433
434
435
436
437 jh_in->b_frozen_triggers = jh_in->b_triggers;
438 }
439
440
441
442
443
444 if (do_escape) {
445 mapped_data = kmap_atomic(new_page);
446 *((unsigned int *)(mapped_data + new_offset)) = 0;
447 kunmap_atomic(mapped_data);
448 }
449
450 set_bh_page(new_bh, new_page, new_offset);
451 new_bh->b_size = bh_in->b_size;
452 new_bh->b_bdev = journal->j_dev;
453 new_bh->b_blocknr = blocknr;
454 new_bh->b_private = bh_in;
455 set_buffer_mapped(new_bh);
456 set_buffer_dirty(new_bh);
457
458 *bh_out = new_bh;
459
460
461
462
463
464
465 JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
466 spin_lock(&journal->j_list_lock);
467 __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
468 spin_unlock(&journal->j_list_lock);
469 set_buffer_shadow(bh_in);
470 spin_unlock(&jh_in->b_state_lock);
471
472 return do_escape | (done_copy_out << 1);
473}
474
475
476
477
478
479
480
481
482
483
484int __jbd2_log_start_commit(journal_t *journal, tid_t target)
485{
486
487 if (journal->j_commit_request == target)
488 return 0;
489
490
491
492
493
494
495 if (journal->j_running_transaction &&
496 journal->j_running_transaction->t_tid == target) {
497
498
499
500
501
502 journal->j_commit_request = target;
503 jbd_debug(1, "JBD2: requesting commit %u/%u\n",
504 journal->j_commit_request,
505 journal->j_commit_sequence);
506 journal->j_running_transaction->t_requested = jiffies;
507 wake_up(&journal->j_wait_commit);
508 return 1;
509 } else if (!tid_geq(journal->j_commit_request, target))
510
511
512
513 WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n",
514 journal->j_commit_request,
515 journal->j_commit_sequence,
516 target, journal->j_running_transaction ?
517 journal->j_running_transaction->t_tid : 0);
518 return 0;
519}
520
521int jbd2_log_start_commit(journal_t *journal, tid_t tid)
522{
523 int ret;
524
525 write_lock(&journal->j_state_lock);
526 ret = __jbd2_log_start_commit(journal, tid);
527 write_unlock(&journal->j_state_lock);
528 return ret;
529}
530
531
532
533
534
535
536
537
538static int __jbd2_journal_force_commit(journal_t *journal)
539{
540 transaction_t *transaction = NULL;
541 tid_t tid;
542 int need_to_start = 0, ret = 0;
543
544 read_lock(&journal->j_state_lock);
545 if (journal->j_running_transaction && !current->journal_info) {
546 transaction = journal->j_running_transaction;
547 if (!tid_geq(journal->j_commit_request, transaction->t_tid))
548 need_to_start = 1;
549 } else if (journal->j_committing_transaction)
550 transaction = journal->j_committing_transaction;
551
552 if (!transaction) {
553
554 read_unlock(&journal->j_state_lock);
555 return 0;
556 }
557 tid = transaction->t_tid;
558 read_unlock(&journal->j_state_lock);
559 if (need_to_start)
560 jbd2_log_start_commit(journal, tid);
561 ret = jbd2_log_wait_commit(journal, tid);
562 if (!ret)
563 ret = 1;
564
565 return ret;
566}
567
568
569
570
571
572
573
574
575
576
577
578int jbd2_journal_force_commit_nested(journal_t *journal)
579{
580 int ret;
581
582 ret = __jbd2_journal_force_commit(journal);
583 return ret > 0;
584}
585
586
587
588
589
590
591
592
593int jbd2_journal_force_commit(journal_t *journal)
594{
595 int ret;
596
597 J_ASSERT(!current->journal_info);
598 ret = __jbd2_journal_force_commit(journal);
599 if (ret > 0)
600 ret = 0;
601 return ret;
602}
603
604
605
606
607
608
609int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
610{
611 int ret = 0;
612
613 write_lock(&journal->j_state_lock);
614 if (journal->j_running_transaction) {
615 tid_t tid = journal->j_running_transaction->t_tid;
616
617 __jbd2_log_start_commit(journal, tid);
618
619
620 if (ptid)
621 *ptid = tid;
622 ret = 1;
623 } else if (journal->j_committing_transaction) {
624
625
626
627
628 if (ptid)
629 *ptid = journal->j_committing_transaction->t_tid;
630 ret = 1;
631 }
632 write_unlock(&journal->j_state_lock);
633 return ret;
634}
635
636
637
638
639
640
641
642int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
643{
644 int ret = 0;
645 transaction_t *commit_trans;
646
647 if (!(journal->j_flags & JBD2_BARRIER))
648 return 0;
649 read_lock(&journal->j_state_lock);
650
651 if (tid_geq(journal->j_commit_sequence, tid))
652 goto out;
653 commit_trans = journal->j_committing_transaction;
654 if (!commit_trans || commit_trans->t_tid != tid) {
655 ret = 1;
656 goto out;
657 }
658
659
660
661
662 if (journal->j_fs_dev != journal->j_dev) {
663 if (!commit_trans->t_need_data_flush ||
664 commit_trans->t_state >= T_COMMIT_DFLUSH)
665 goto out;
666 } else {
667 if (commit_trans->t_state >= T_COMMIT_JFLUSH)
668 goto out;
669 }
670 ret = 1;
671out:
672 read_unlock(&journal->j_state_lock);
673 return ret;
674}
675EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier);
676
677
678
679
680
681int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
682{
683 int err = 0;
684
685 read_lock(&journal->j_state_lock);
686#ifdef CONFIG_PROVE_LOCKING
687
688
689
690
691
692 if (tid_gt(tid, journal->j_commit_sequence) &&
693 (!journal->j_committing_transaction ||
694 journal->j_committing_transaction->t_tid != tid)) {
695 read_unlock(&journal->j_state_lock);
696 jbd2_might_wait_for_commit(journal);
697 read_lock(&journal->j_state_lock);
698 }
699#endif
700#ifdef CONFIG_JBD2_DEBUG
701 if (!tid_geq(journal->j_commit_request, tid)) {
702 printk(KERN_ERR
703 "%s: error: j_commit_request=%u, tid=%u\n",
704 __func__, journal->j_commit_request, tid);
705 }
706#endif
707 while (tid_gt(tid, journal->j_commit_sequence)) {
708 jbd_debug(1, "JBD2: want %u, j_commit_sequence=%u\n",
709 tid, journal->j_commit_sequence);
710 read_unlock(&journal->j_state_lock);
711 wake_up(&journal->j_wait_commit);
712 wait_event(journal->j_wait_done_commit,
713 !tid_gt(tid, journal->j_commit_sequence));
714 read_lock(&journal->j_state_lock);
715 }
716 read_unlock(&journal->j_state_lock);
717
718 if (unlikely(is_journal_aborted(journal)))
719 err = -EIO;
720 return err;
721}
722
723
724
725
726
727
728
729
730int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
731{
732 if (unlikely(is_journal_aborted(journal)))
733 return -EIO;
734
735
736
737
738 if (!journal->j_stats.ts_tid)
739 return -EINVAL;
740
741 write_lock(&journal->j_state_lock);
742 if (tid <= journal->j_commit_sequence) {
743 write_unlock(&journal->j_state_lock);
744 return -EALREADY;
745 }
746
747 if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
748 (journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) {
749 DEFINE_WAIT(wait);
750
751 prepare_to_wait(&journal->j_fc_wait, &wait,
752 TASK_UNINTERRUPTIBLE);
753 write_unlock(&journal->j_state_lock);
754 schedule();
755 finish_wait(&journal->j_fc_wait, &wait);
756 return -EALREADY;
757 }
758 journal->j_flags |= JBD2_FAST_COMMIT_ONGOING;
759 write_unlock(&journal->j_state_lock);
760
761 return 0;
762}
763EXPORT_SYMBOL(jbd2_fc_begin_commit);
764
765
766
767
768
769static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
770{
771 if (journal->j_fc_cleanup_callback)
772 journal->j_fc_cleanup_callback(journal, 0);
773 write_lock(&journal->j_state_lock);
774 journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
775 if (fallback)
776 journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
777 write_unlock(&journal->j_state_lock);
778 wake_up(&journal->j_fc_wait);
779 if (fallback)
780 return jbd2_complete_transaction(journal, tid);
781 return 0;
782}
783
784int jbd2_fc_end_commit(journal_t *journal)
785{
786 return __jbd2_fc_end_commit(journal, 0, false);
787}
788EXPORT_SYMBOL(jbd2_fc_end_commit);
789
790int jbd2_fc_end_commit_fallback(journal_t *journal)
791{
792 tid_t tid;
793
794 read_lock(&journal->j_state_lock);
795 tid = journal->j_running_transaction ?
796 journal->j_running_transaction->t_tid : 0;
797 read_unlock(&journal->j_state_lock);
798 return __jbd2_fc_end_commit(journal, tid, true);
799}
800EXPORT_SYMBOL(jbd2_fc_end_commit_fallback);
801
802
803int jbd2_transaction_committed(journal_t *journal, tid_t tid)
804{
805 int ret = 1;
806
807 read_lock(&journal->j_state_lock);
808 if (journal->j_running_transaction &&
809 journal->j_running_transaction->t_tid == tid)
810 ret = 0;
811 if (journal->j_committing_transaction &&
812 journal->j_committing_transaction->t_tid == tid)
813 ret = 0;
814 read_unlock(&journal->j_state_lock);
815 return ret;
816}
817EXPORT_SYMBOL(jbd2_transaction_committed);
818
819
820
821
822
823
824
825
826int jbd2_complete_transaction(journal_t *journal, tid_t tid)
827{
828 int need_to_wait = 1;
829
830 read_lock(&journal->j_state_lock);
831 if (journal->j_running_transaction &&
832 journal->j_running_transaction->t_tid == tid) {
833 if (journal->j_commit_request != tid) {
834
835 read_unlock(&journal->j_state_lock);
836 jbd2_log_start_commit(journal, tid);
837 goto wait_commit;
838 }
839 } else if (!(journal->j_committing_transaction &&
840 journal->j_committing_transaction->t_tid == tid))
841 need_to_wait = 0;
842 read_unlock(&journal->j_state_lock);
843 if (!need_to_wait)
844 return 0;
845wait_commit:
846 return jbd2_log_wait_commit(journal, tid);
847}
848EXPORT_SYMBOL(jbd2_complete_transaction);
849
850
851
852
853
854int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
855{
856 unsigned long blocknr;
857
858 write_lock(&journal->j_state_lock);
859 J_ASSERT(journal->j_free > 1);
860
861 blocknr = journal->j_head;
862 journal->j_head++;
863 journal->j_free--;
864 if (journal->j_head == journal->j_last)
865 journal->j_head = journal->j_first;
866 write_unlock(&journal->j_state_lock);
867 return jbd2_journal_bmap(journal, blocknr, retp);
868}
869
870
871int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
872{
873 unsigned long long pblock;
874 unsigned long blocknr;
875 int ret = 0;
876 struct buffer_head *bh;
877 int fc_off;
878
879 *bh_out = NULL;
880
881 if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
882 fc_off = journal->j_fc_off;
883 blocknr = journal->j_fc_first + fc_off;
884 journal->j_fc_off++;
885 } else {
886 ret = -EINVAL;
887 }
888
889 if (ret)
890 return ret;
891
892 ret = jbd2_journal_bmap(journal, blocknr, &pblock);
893 if (ret)
894 return ret;
895
896 bh = __getblk(journal->j_dev, pblock, journal->j_blocksize);
897 if (!bh)
898 return -ENOMEM;
899
900
901 journal->j_fc_wbuf[fc_off] = bh;
902
903 *bh_out = bh;
904
905 return 0;
906}
907EXPORT_SYMBOL(jbd2_fc_get_buf);
908
909
910
911
912
913int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
914{
915 struct buffer_head *bh;
916 int i, j_fc_off;
917
918 j_fc_off = journal->j_fc_off;
919
920
921
922
923
924 for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) {
925 bh = journal->j_fc_wbuf[i];
926 wait_on_buffer(bh);
927 put_bh(bh);
928 journal->j_fc_wbuf[i] = NULL;
929 if (unlikely(!buffer_uptodate(bh)))
930 return -EIO;
931 }
932
933 return 0;
934}
935EXPORT_SYMBOL(jbd2_fc_wait_bufs);
936
937int jbd2_fc_release_bufs(journal_t *journal)
938{
939 struct buffer_head *bh;
940 int i, j_fc_off;
941
942 j_fc_off = journal->j_fc_off;
943
944 for (i = j_fc_off - 1; i >= 0; i--) {
945 bh = journal->j_fc_wbuf[i];
946 if (!bh)
947 break;
948 put_bh(bh);
949 journal->j_fc_wbuf[i] = NULL;
950 }
951
952 return 0;
953}
954EXPORT_SYMBOL(jbd2_fc_release_bufs);
955
956
957
958
959
960
961
962
963int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
964 unsigned long long *retp)
965{
966 int err = 0;
967 unsigned long long ret;
968 sector_t block = 0;
969
970 if (journal->j_inode) {
971 block = blocknr;
972 ret = bmap(journal->j_inode, &block);
973
974 if (ret || !block) {
975 printk(KERN_ALERT "%s: journal block not found "
976 "at offset %lu on %s\n",
977 __func__, blocknr, journal->j_devname);
978 err = -EIO;
979 jbd2_journal_abort(journal, err);
980 } else {
981 *retp = block;
982 }
983
984 } else {
985 *retp = blocknr;
986 }
987 return err;
988}
989
990
991
992
993
994
995
996
997
998
999
1000struct buffer_head *
1001jbd2_journal_get_descriptor_buffer(transaction_t *transaction, int type)
1002{
1003 journal_t *journal = transaction->t_journal;
1004 struct buffer_head *bh;
1005 unsigned long long blocknr;
1006 journal_header_t *header;
1007 int err;
1008
1009 err = jbd2_journal_next_log_block(journal, &blocknr);
1010
1011 if (err)
1012 return NULL;
1013
1014 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
1015 if (!bh)
1016 return NULL;
1017 atomic_dec(&transaction->t_outstanding_credits);
1018 lock_buffer(bh);
1019 memset(bh->b_data, 0, journal->j_blocksize);
1020 header = (journal_header_t *)bh->b_data;
1021 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
1022 header->h_blocktype = cpu_to_be32(type);
1023 header->h_sequence = cpu_to_be32(transaction->t_tid);
1024 set_buffer_uptodate(bh);
1025 unlock_buffer(bh);
1026 BUFFER_TRACE(bh, "return this buffer");
1027 return bh;
1028}
1029
1030void jbd2_descriptor_block_csum_set(journal_t *j, struct buffer_head *bh)
1031{
1032 struct jbd2_journal_block_tail *tail;
1033 __u32 csum;
1034
1035 if (!jbd2_journal_has_csum_v2or3(j))
1036 return;
1037
1038 tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
1039 sizeof(struct jbd2_journal_block_tail));
1040 tail->t_checksum = 0;
1041 csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
1042 tail->t_checksum = cpu_to_be32(csum);
1043}
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
1056 unsigned long *block)
1057{
1058 transaction_t *transaction;
1059 int ret;
1060
1061 read_lock(&journal->j_state_lock);
1062 spin_lock(&journal->j_list_lock);
1063 transaction = journal->j_checkpoint_transactions;
1064 if (transaction) {
1065 *tid = transaction->t_tid;
1066 *block = transaction->t_log_start;
1067 } else if ((transaction = journal->j_committing_transaction) != NULL) {
1068 *tid = transaction->t_tid;
1069 *block = transaction->t_log_start;
1070 } else if ((transaction = journal->j_running_transaction) != NULL) {
1071 *tid = transaction->t_tid;
1072 *block = journal->j_head;
1073 } else {
1074 *tid = journal->j_transaction_sequence;
1075 *block = journal->j_head;
1076 }
1077 ret = tid_gt(*tid, journal->j_tail_sequence);
1078 spin_unlock(&journal->j_list_lock);
1079 read_unlock(&journal->j_state_lock);
1080
1081 return ret;
1082}
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
1095{
1096 unsigned long freed;
1097 int ret;
1098
1099 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1100
1101
1102
1103
1104
1105
1106
1107 ret = jbd2_journal_update_sb_log_tail(journal, tid, block,
1108 REQ_SYNC | REQ_FUA);
1109 if (ret)
1110 goto out;
1111
1112 write_lock(&journal->j_state_lock);
1113 freed = block - journal->j_tail;
1114 if (block < journal->j_tail)
1115 freed += journal->j_last - journal->j_first;
1116
1117 trace_jbd2_update_log_tail(journal, tid, block, freed);
1118 jbd_debug(1,
1119 "Cleaning journal tail from %u to %u (offset %lu), "
1120 "freeing %lu\n",
1121 journal->j_tail_sequence, tid, block, freed);
1122
1123 journal->j_free += freed;
1124 journal->j_tail_sequence = tid;
1125 journal->j_tail = block;
1126 write_unlock(&journal->j_state_lock);
1127
1128out:
1129 return ret;
1130}
1131
1132
1133
1134
1135
1136
1137void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
1138{
1139 mutex_lock_io(&journal->j_checkpoint_mutex);
1140 if (tid_gt(tid, journal->j_tail_sequence))
1141 __jbd2_update_log_tail(journal, tid, block);
1142 mutex_unlock(&journal->j_checkpoint_mutex);
1143}
1144
1145struct jbd2_stats_proc_session {
1146 journal_t *journal;
1147 struct transaction_stats_s *stats;
1148 int start;
1149 int max;
1150};
1151
1152static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos)
1153{
1154 return *pos ? NULL : SEQ_START_TOKEN;
1155}
1156
1157static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
1158{
1159 (*pos)++;
1160 return NULL;
1161}
1162
1163static int jbd2_seq_info_show(struct seq_file *seq, void *v)
1164{
1165 struct jbd2_stats_proc_session *s = seq->private;
1166
1167 if (v != SEQ_START_TOKEN)
1168 return 0;
1169 seq_printf(seq, "%lu transactions (%lu requested), "
1170 "each up to %u blocks\n",
1171 s->stats->ts_tid, s->stats->ts_requested,
1172 s->journal->j_max_transaction_buffers);
1173 if (s->stats->ts_tid == 0)
1174 return 0;
1175 seq_printf(seq, "average: \n %ums waiting for transaction\n",
1176 jiffies_to_msecs(s->stats->run.rs_wait / s->stats->ts_tid));
1177 seq_printf(seq, " %ums request delay\n",
1178 (s->stats->ts_requested == 0) ? 0 :
1179 jiffies_to_msecs(s->stats->run.rs_request_delay /
1180 s->stats->ts_requested));
1181 seq_printf(seq, " %ums running transaction\n",
1182 jiffies_to_msecs(s->stats->run.rs_running / s->stats->ts_tid));
1183 seq_printf(seq, " %ums transaction was being locked\n",
1184 jiffies_to_msecs(s->stats->run.rs_locked / s->stats->ts_tid));
1185 seq_printf(seq, " %ums flushing data (in ordered mode)\n",
1186 jiffies_to_msecs(s->stats->run.rs_flushing / s->stats->ts_tid));
1187 seq_printf(seq, " %ums logging transaction\n",
1188 jiffies_to_msecs(s->stats->run.rs_logging / s->stats->ts_tid));
1189 seq_printf(seq, " %lluus average transaction commit time\n",
1190 div_u64(s->journal->j_average_commit_time, 1000));
1191 seq_printf(seq, " %lu handles per transaction\n",
1192 s->stats->run.rs_handle_count / s->stats->ts_tid);
1193 seq_printf(seq, " %lu blocks per transaction\n",
1194 s->stats->run.rs_blocks / s->stats->ts_tid);
1195 seq_printf(seq, " %lu logged blocks per transaction\n",
1196 s->stats->run.rs_blocks_logged / s->stats->ts_tid);
1197 return 0;
1198}
1199
1200static void jbd2_seq_info_stop(struct seq_file *seq, void *v)
1201{
1202}
1203
1204static const struct seq_operations jbd2_seq_info_ops = {
1205 .start = jbd2_seq_info_start,
1206 .next = jbd2_seq_info_next,
1207 .stop = jbd2_seq_info_stop,
1208 .show = jbd2_seq_info_show,
1209};
1210
1211static int jbd2_seq_info_open(struct inode *inode, struct file *file)
1212{
1213 journal_t *journal = PDE_DATA(inode);
1214 struct jbd2_stats_proc_session *s;
1215 int rc, size;
1216
1217 s = kmalloc(sizeof(*s), GFP_KERNEL);
1218 if (s == NULL)
1219 return -ENOMEM;
1220 size = sizeof(struct transaction_stats_s);
1221 s->stats = kmalloc(size, GFP_KERNEL);
1222 if (s->stats == NULL) {
1223 kfree(s);
1224 return -ENOMEM;
1225 }
1226 spin_lock(&journal->j_history_lock);
1227 memcpy(s->stats, &journal->j_stats, size);
1228 s->journal = journal;
1229 spin_unlock(&journal->j_history_lock);
1230
1231 rc = seq_open(file, &jbd2_seq_info_ops);
1232 if (rc == 0) {
1233 struct seq_file *m = file->private_data;
1234 m->private = s;
1235 } else {
1236 kfree(s->stats);
1237 kfree(s);
1238 }
1239 return rc;
1240
1241}
1242
1243static int jbd2_seq_info_release(struct inode *inode, struct file *file)
1244{
1245 struct seq_file *seq = file->private_data;
1246 struct jbd2_stats_proc_session *s = seq->private;
1247 kfree(s->stats);
1248 kfree(s);
1249 return seq_release(inode, file);
1250}
1251
1252static const struct proc_ops jbd2_info_proc_ops = {
1253 .proc_open = jbd2_seq_info_open,
1254 .proc_read = seq_read,
1255 .proc_lseek = seq_lseek,
1256 .proc_release = jbd2_seq_info_release,
1257};
1258
1259static struct proc_dir_entry *proc_jbd2_stats;
1260
1261static void jbd2_stats_proc_init(journal_t *journal)
1262{
1263 journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats);
1264 if (journal->j_proc_entry) {
1265 proc_create_data("info", S_IRUGO, journal->j_proc_entry,
1266 &jbd2_info_proc_ops, journal);
1267 }
1268}
1269
1270static void jbd2_stats_proc_exit(journal_t *journal)
1271{
1272 remove_proc_entry("info", journal->j_proc_entry);
1273 remove_proc_entry(journal->j_devname, proc_jbd2_stats);
1274}
1275
1276
1277static int jbd2_min_tag_size(void)
1278{
1279
1280
1281
1282
1283 return sizeof(journal_block_tag_t) - 4;
1284}
1285
1286
1287
1288
1289
1290
1291
1292static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink,
1293 struct shrink_control *sc)
1294{
1295 journal_t *journal = container_of(shrink, journal_t, j_shrinker);
1296 unsigned long nr_to_scan = sc->nr_to_scan;
1297 unsigned long nr_shrunk;
1298 unsigned long count;
1299
1300 count = percpu_counter_read_positive(&journal->j_checkpoint_jh_count);
1301 trace_jbd2_shrink_scan_enter(journal, sc->nr_to_scan, count);
1302
1303 nr_shrunk = jbd2_journal_shrink_checkpoint_list(journal, &nr_to_scan);
1304
1305 count = percpu_counter_read_positive(&journal->j_checkpoint_jh_count);
1306 trace_jbd2_shrink_scan_exit(journal, nr_to_scan, nr_shrunk, count);
1307
1308 return nr_shrunk;
1309}
1310
1311
1312
1313
1314
1315
1316static unsigned long jbd2_journal_shrink_count(struct shrinker *shrink,
1317 struct shrink_control *sc)
1318{
1319 journal_t *journal = container_of(shrink, journal_t, j_shrinker);
1320 unsigned long count;
1321
1322 count = percpu_counter_read_positive(&journal->j_checkpoint_jh_count);
1323 trace_jbd2_shrink_count(journal, sc->nr_to_scan, count);
1324
1325 return count;
1326}
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337static journal_t *journal_init_common(struct block_device *bdev,
1338 struct block_device *fs_dev,
1339 unsigned long long start, int len, int blocksize)
1340{
1341 static struct lock_class_key jbd2_trans_commit_key;
1342 journal_t *journal;
1343 int err;
1344 struct buffer_head *bh;
1345 int n;
1346
1347 journal = kzalloc(sizeof(*journal), GFP_KERNEL);
1348 if (!journal)
1349 return NULL;
1350
1351 init_waitqueue_head(&journal->j_wait_transaction_locked);
1352 init_waitqueue_head(&journal->j_wait_done_commit);
1353 init_waitqueue_head(&journal->j_wait_commit);
1354 init_waitqueue_head(&journal->j_wait_updates);
1355 init_waitqueue_head(&journal->j_wait_reserved);
1356 init_waitqueue_head(&journal->j_fc_wait);
1357 mutex_init(&journal->j_abort_mutex);
1358 mutex_init(&journal->j_barrier);
1359 mutex_init(&journal->j_checkpoint_mutex);
1360 spin_lock_init(&journal->j_revoke_lock);
1361 spin_lock_init(&journal->j_list_lock);
1362 rwlock_init(&journal->j_state_lock);
1363
1364 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
1365 journal->j_min_batch_time = 0;
1366 journal->j_max_batch_time = 15000;
1367 atomic_set(&journal->j_reserved_credits, 0);
1368
1369
1370 journal->j_flags = JBD2_ABORT;
1371
1372
1373 err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
1374 if (err)
1375 goto err_cleanup;
1376
1377 spin_lock_init(&journal->j_history_lock);
1378
1379 lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
1380 &jbd2_trans_commit_key, 0);
1381
1382
1383 journal->j_blocksize = blocksize;
1384 journal->j_dev = bdev;
1385 journal->j_fs_dev = fs_dev;
1386 journal->j_blk_offset = start;
1387 journal->j_total_len = len;
1388
1389 n = journal->j_blocksize / jbd2_min_tag_size();
1390 journal->j_wbufsize = n;
1391 journal->j_fc_wbuf = NULL;
1392 journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
1393 GFP_KERNEL);
1394 if (!journal->j_wbuf)
1395 goto err_cleanup;
1396
1397 bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
1398 if (!bh) {
1399 pr_err("%s: Cannot get buffer for journal superblock\n",
1400 __func__);
1401 goto err_cleanup;
1402 }
1403 journal->j_sb_buffer = bh;
1404 journal->j_superblock = (journal_superblock_t *)bh->b_data;
1405
1406 journal->j_shrink_transaction = NULL;
1407 journal->j_shrinker.scan_objects = jbd2_journal_shrink_scan;
1408 journal->j_shrinker.count_objects = jbd2_journal_shrink_count;
1409 journal->j_shrinker.seeks = DEFAULT_SEEKS;
1410 journal->j_shrinker.batch = journal->j_max_transaction_buffers;
1411
1412 if (percpu_counter_init(&journal->j_checkpoint_jh_count, 0, GFP_KERNEL))
1413 goto err_cleanup;
1414
1415 if (register_shrinker(&journal->j_shrinker)) {
1416 percpu_counter_destroy(&journal->j_checkpoint_jh_count);
1417 goto err_cleanup;
1418 }
1419 return journal;
1420
1421err_cleanup:
1422 brelse(journal->j_sb_buffer);
1423 kfree(journal->j_wbuf);
1424 jbd2_journal_destroy_revoke(journal);
1425 kfree(journal);
1426 return NULL;
1427}
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452journal_t *jbd2_journal_init_dev(struct block_device *bdev,
1453 struct block_device *fs_dev,
1454 unsigned long long start, int len, int blocksize)
1455{
1456 journal_t *journal;
1457
1458 journal = journal_init_common(bdev, fs_dev, start, len, blocksize);
1459 if (!journal)
1460 return NULL;
1461
1462 bdevname(journal->j_dev, journal->j_devname);
1463 strreplace(journal->j_devname, '/', '!');
1464 jbd2_stats_proc_init(journal);
1465
1466 return journal;
1467}
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477journal_t *jbd2_journal_init_inode(struct inode *inode)
1478{
1479 journal_t *journal;
1480 sector_t blocknr;
1481 char *p;
1482 int err = 0;
1483
1484 blocknr = 0;
1485 err = bmap(inode, &blocknr);
1486
1487 if (err || !blocknr) {
1488 pr_err("%s: Cannot locate journal superblock\n",
1489 __func__);
1490 return NULL;
1491 }
1492
1493 jbd_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n",
1494 inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size,
1495 inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
1496
1497 journal = journal_init_common(inode->i_sb->s_bdev, inode->i_sb->s_bdev,
1498 blocknr, inode->i_size >> inode->i_sb->s_blocksize_bits,
1499 inode->i_sb->s_blocksize);
1500 if (!journal)
1501 return NULL;
1502
1503 journal->j_inode = inode;
1504 bdevname(journal->j_dev, journal->j_devname);
1505 p = strreplace(journal->j_devname, '/', '!');
1506 sprintf(p, "-%lu", journal->j_inode->i_ino);
1507 jbd2_stats_proc_init(journal);
1508
1509 return journal;
1510}
1511
1512
1513
1514
1515
1516
1517static void journal_fail_superblock(journal_t *journal)
1518{
1519 struct buffer_head *bh = journal->j_sb_buffer;
1520 brelse(bh);
1521 journal->j_sb_buffer = NULL;
1522}
1523
1524
1525
1526
1527
1528
1529
1530
1531static int journal_reset(journal_t *journal)
1532{
1533 journal_superblock_t *sb = journal->j_superblock;
1534 unsigned long long first, last;
1535
1536 first = be32_to_cpu(sb->s_first);
1537 last = be32_to_cpu(sb->s_maxlen);
1538 if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) {
1539 printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n",
1540 first, last);
1541 journal_fail_superblock(journal);
1542 return -EINVAL;
1543 }
1544
1545 journal->j_first = first;
1546 journal->j_last = last;
1547
1548 journal->j_head = journal->j_first;
1549 journal->j_tail = journal->j_first;
1550 journal->j_free = journal->j_last - journal->j_first;
1551
1552 journal->j_tail_sequence = journal->j_transaction_sequence;
1553 journal->j_commit_sequence = journal->j_transaction_sequence - 1;
1554 journal->j_commit_request = journal->j_commit_sequence;
1555
1556 journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal);
1557
1558
1559
1560
1561
1562
1563 jbd2_clear_feature_fast_commit(journal);
1564
1565
1566
1567
1568
1569
1570
1571 if (sb->s_start == 0) {
1572 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
1573 "(start %ld, seq %u, errno %d)\n",
1574 journal->j_tail, journal->j_tail_sequence,
1575 journal->j_errno);
1576 journal->j_flags |= JBD2_FLUSHED;
1577 } else {
1578
1579 mutex_lock_io(&journal->j_checkpoint_mutex);
1580
1581
1582
1583
1584
1585
1586 jbd2_journal_update_sb_log_tail(journal,
1587 journal->j_tail_sequence,
1588 journal->j_tail,
1589 REQ_SYNC | REQ_FUA);
1590 mutex_unlock(&journal->j_checkpoint_mutex);
1591 }
1592 return jbd2_journal_start_thread(journal);
1593}
1594
1595
1596
1597
1598
1599static int jbd2_write_superblock(journal_t *journal, int write_flags)
1600{
1601 struct buffer_head *bh = journal->j_sb_buffer;
1602 journal_superblock_t *sb = journal->j_superblock;
1603 int ret;
1604
1605
1606 if (!buffer_mapped(bh)) {
1607 unlock_buffer(bh);
1608 return -EIO;
1609 }
1610
1611 trace_jbd2_write_superblock(journal, write_flags);
1612 if (!(journal->j_flags & JBD2_BARRIER))
1613 write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
1614 if (buffer_write_io_error(bh)) {
1615
1616
1617
1618
1619
1620
1621
1622
1623 printk(KERN_ERR "JBD2: previous I/O error detected "
1624 "for journal superblock update for %s.\n",
1625 journal->j_devname);
1626 clear_buffer_write_io_error(bh);
1627 set_buffer_uptodate(bh);
1628 }
1629 if (jbd2_journal_has_csum_v2or3(journal))
1630 sb->s_checksum = jbd2_superblock_csum(journal, sb);
1631 get_bh(bh);
1632 bh->b_end_io = end_buffer_write_sync;
1633 ret = submit_bh(REQ_OP_WRITE, write_flags, bh);
1634 wait_on_buffer(bh);
1635 if (buffer_write_io_error(bh)) {
1636 clear_buffer_write_io_error(bh);
1637 set_buffer_uptodate(bh);
1638 ret = -EIO;
1639 }
1640 if (ret) {
1641 printk(KERN_ERR "JBD2: Error %d detected when updating "
1642 "journal superblock for %s.\n", ret,
1643 journal->j_devname);
1644 if (!is_journal_aborted(journal))
1645 jbd2_journal_abort(journal, ret);
1646 }
1647
1648 return ret;
1649}
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
1662 unsigned long tail_block, int write_op)
1663{
1664 journal_superblock_t *sb = journal->j_superblock;
1665 int ret;
1666
1667 if (is_journal_aborted(journal))
1668 return -EIO;
1669 if (test_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags)) {
1670 jbd2_journal_abort(journal, -EIO);
1671 return -EIO;
1672 }
1673
1674 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1675 jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
1676 tail_block, tail_tid);
1677
1678 lock_buffer(journal->j_sb_buffer);
1679 sb->s_sequence = cpu_to_be32(tail_tid);
1680 sb->s_start = cpu_to_be32(tail_block);
1681
1682 ret = jbd2_write_superblock(journal, write_op);
1683 if (ret)
1684 goto out;
1685
1686
1687 write_lock(&journal->j_state_lock);
1688 WARN_ON(!sb->s_sequence);
1689 journal->j_flags &= ~JBD2_FLUSHED;
1690 write_unlock(&journal->j_state_lock);
1691
1692out:
1693 return ret;
1694}
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
1705{
1706 journal_superblock_t *sb = journal->j_superblock;
1707 bool had_fast_commit = false;
1708
1709 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1710 lock_buffer(journal->j_sb_buffer);
1711 if (sb->s_start == 0) {
1712 unlock_buffer(journal->j_sb_buffer);
1713 return;
1714 }
1715
1716 jbd_debug(1, "JBD2: Marking journal as empty (seq %u)\n",
1717 journal->j_tail_sequence);
1718
1719 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
1720 sb->s_start = cpu_to_be32(0);
1721 if (jbd2_has_feature_fast_commit(journal)) {
1722
1723
1724
1725
1726 jbd2_clear_feature_fast_commit(journal);
1727 had_fast_commit = true;
1728 }
1729
1730 jbd2_write_superblock(journal, write_op);
1731
1732 if (had_fast_commit)
1733 jbd2_set_feature_fast_commit(journal);
1734
1735
1736 write_lock(&journal->j_state_lock);
1737 journal->j_flags |= JBD2_FLUSHED;
1738 write_unlock(&journal->j_state_lock);
1739}
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
1754{
1755 int err = 0;
1756 unsigned long block, log_offset;
1757 unsigned long long phys_block, block_start, block_stop;
1758 loff_t byte_start, byte_stop, byte_count;
1759 struct request_queue *q = bdev_get_queue(journal->j_dev);
1760
1761
1762 if ((flags & ~JBD2_JOURNAL_FLUSH_VALID) || !flags ||
1763 ((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
1764 (flags & JBD2_JOURNAL_FLUSH_ZEROOUT)))
1765 return -EINVAL;
1766
1767 if (!q)
1768 return -ENXIO;
1769
1770 if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q))
1771 return -EOPNOTSUPP;
1772
1773
1774
1775
1776
1777 log_offset = be32_to_cpu(journal->j_superblock->s_first);
1778 block_start = ~0ULL;
1779 for (block = log_offset; block < journal->j_total_len; block++) {
1780 err = jbd2_journal_bmap(journal, block, &phys_block);
1781 if (err) {
1782 pr_err("JBD2: bad block at offset %lu", block);
1783 return err;
1784 }
1785
1786 if (block_start == ~0ULL) {
1787 block_start = phys_block;
1788 block_stop = block_start - 1;
1789 }
1790
1791
1792
1793
1794
1795
1796 if (phys_block != block_stop + 1) {
1797 block--;
1798 } else {
1799 block_stop++;
1800
1801
1802
1803
1804
1805 if (block != journal->j_total_len - 1)
1806 continue;
1807 }
1808
1809
1810
1811
1812
1813 byte_start = block_start * journal->j_blocksize;
1814 byte_stop = block_stop * journal->j_blocksize;
1815 byte_count = (block_stop - block_start + 1) *
1816 journal->j_blocksize;
1817
1818 truncate_inode_pages_range(journal->j_dev->bd_inode->i_mapping,
1819 byte_start, byte_stop);
1820
1821 if (flags & JBD2_JOURNAL_FLUSH_DISCARD) {
1822 err = blkdev_issue_discard(journal->j_dev,
1823 byte_start >> SECTOR_SHIFT,
1824 byte_count >> SECTOR_SHIFT,
1825 GFP_NOFS, 0);
1826 } else if (flags & JBD2_JOURNAL_FLUSH_ZEROOUT) {
1827 err = blkdev_issue_zeroout(journal->j_dev,
1828 byte_start >> SECTOR_SHIFT,
1829 byte_count >> SECTOR_SHIFT,
1830 GFP_NOFS, 0);
1831 }
1832
1833 if (unlikely(err != 0)) {
1834 pr_err("JBD2: (error %d) unable to wipe journal at physical blocks %llu - %llu",
1835 err, block_start, block_stop);
1836 return err;
1837 }
1838
1839
1840 block_start = ~0ULL;
1841 }
1842
1843 return blkdev_issue_flush(journal->j_dev);
1844}
1845
1846
1847
1848
1849
1850
1851
1852
1853void jbd2_journal_update_sb_errno(journal_t *journal)
1854{
1855 journal_superblock_t *sb = journal->j_superblock;
1856 int errcode;
1857
1858 lock_buffer(journal->j_sb_buffer);
1859 errcode = journal->j_errno;
1860 if (errcode == -ESHUTDOWN)
1861 errcode = 0;
1862 jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode);
1863 sb->s_errno = cpu_to_be32(errcode);
1864
1865 jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA);
1866}
1867EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
1868
1869static int journal_revoke_records_per_block(journal_t *journal)
1870{
1871 int record_size;
1872 int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
1873
1874 if (jbd2_has_feature_64bit(journal))
1875 record_size = 8;
1876 else
1877 record_size = 4;
1878
1879 if (jbd2_journal_has_csum_v2or3(journal))
1880 space -= sizeof(struct jbd2_journal_block_tail);
1881 return space / record_size;
1882}
1883
1884
1885
1886
1887
1888static int journal_get_superblock(journal_t *journal)
1889{
1890 struct buffer_head *bh;
1891 journal_superblock_t *sb;
1892 int err = -EIO;
1893
1894 bh = journal->j_sb_buffer;
1895
1896 J_ASSERT(bh != NULL);
1897 if (!buffer_uptodate(bh)) {
1898 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
1899 wait_on_buffer(bh);
1900 if (!buffer_uptodate(bh)) {
1901 printk(KERN_ERR
1902 "JBD2: IO error reading journal superblock\n");
1903 goto out;
1904 }
1905 }
1906
1907 if (buffer_verified(bh))
1908 return 0;
1909
1910 sb = journal->j_superblock;
1911
1912 err = -EINVAL;
1913
1914 if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
1915 sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
1916 printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
1917 goto out;
1918 }
1919
1920 switch(be32_to_cpu(sb->s_header.h_blocktype)) {
1921 case JBD2_SUPERBLOCK_V1:
1922 journal->j_format_version = 1;
1923 break;
1924 case JBD2_SUPERBLOCK_V2:
1925 journal->j_format_version = 2;
1926 break;
1927 default:
1928 printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
1929 goto out;
1930 }
1931
1932 if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
1933 journal->j_total_len = be32_to_cpu(sb->s_maxlen);
1934 else if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
1935 printk(KERN_WARNING "JBD2: journal file too short\n");
1936 goto out;
1937 }
1938
1939 if (be32_to_cpu(sb->s_first) == 0 ||
1940 be32_to_cpu(sb->s_first) >= journal->j_total_len) {
1941 printk(KERN_WARNING
1942 "JBD2: Invalid start block of journal: %u\n",
1943 be32_to_cpu(sb->s_first));
1944 goto out;
1945 }
1946
1947 if (jbd2_has_feature_csum2(journal) &&
1948 jbd2_has_feature_csum3(journal)) {
1949
1950 printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
1951 "at the same time!\n");
1952 goto out;
1953 }
1954
1955 if (jbd2_journal_has_csum_v2or3_feature(journal) &&
1956 jbd2_has_feature_checksum(journal)) {
1957
1958 printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
1959 "at the same time!\n");
1960 goto out;
1961 }
1962
1963 if (!jbd2_verify_csum_type(journal, sb)) {
1964 printk(KERN_ERR "JBD2: Unknown checksum type\n");
1965 goto out;
1966 }
1967
1968
1969 if (jbd2_journal_has_csum_v2or3_feature(journal)) {
1970 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
1971 if (IS_ERR(journal->j_chksum_driver)) {
1972 printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
1973 err = PTR_ERR(journal->j_chksum_driver);
1974 journal->j_chksum_driver = NULL;
1975 goto out;
1976 }
1977 }
1978
1979 if (jbd2_journal_has_csum_v2or3(journal)) {
1980
1981 if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
1982 printk(KERN_ERR "JBD2: journal checksum error\n");
1983 err = -EFSBADCRC;
1984 goto out;
1985 }
1986
1987
1988 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
1989 sizeof(sb->s_uuid));
1990 }
1991
1992 journal->j_revoke_records_per_block =
1993 journal_revoke_records_per_block(journal);
1994 set_buffer_verified(bh);
1995
1996 return 0;
1997
1998out:
1999 journal_fail_superblock(journal);
2000 return err;
2001}
2002
2003
2004
2005
2006
2007
2008static int load_superblock(journal_t *journal)
2009{
2010 int err;
2011 journal_superblock_t *sb;
2012 int num_fc_blocks;
2013
2014 err = journal_get_superblock(journal);
2015 if (err)
2016 return err;
2017
2018 sb = journal->j_superblock;
2019
2020 journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
2021 journal->j_tail = be32_to_cpu(sb->s_start);
2022 journal->j_first = be32_to_cpu(sb->s_first);
2023 journal->j_errno = be32_to_cpu(sb->s_errno);
2024 journal->j_last = be32_to_cpu(sb->s_maxlen);
2025
2026 if (jbd2_has_feature_fast_commit(journal)) {
2027 journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
2028 num_fc_blocks = jbd2_journal_get_num_fc_blks(sb);
2029 if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS)
2030 journal->j_last = journal->j_fc_last - num_fc_blocks;
2031 journal->j_fc_first = journal->j_last + 1;
2032 journal->j_fc_off = 0;
2033 }
2034
2035 return 0;
2036}
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047int jbd2_journal_load(journal_t *journal)
2048{
2049 int err;
2050 journal_superblock_t *sb;
2051
2052 err = load_superblock(journal);
2053 if (err)
2054 return err;
2055
2056 sb = journal->j_superblock;
2057
2058
2059
2060 if (journal->j_format_version >= 2) {
2061 if ((sb->s_feature_ro_compat &
2062 ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
2063 (sb->s_feature_incompat &
2064 ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
2065 printk(KERN_WARNING
2066 "JBD2: Unrecognised features on journal\n");
2067 return -EINVAL;
2068 }
2069 }
2070
2071
2072
2073
2074 err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize));
2075 if (err)
2076 return err;
2077
2078
2079
2080 if (jbd2_journal_recover(journal))
2081 goto recovery_error;
2082
2083 if (journal->j_failed_commit) {
2084 printk(KERN_ERR "JBD2: journal transaction %u on %s "
2085 "is corrupt.\n", journal->j_failed_commit,
2086 journal->j_devname);
2087 return -EFSCORRUPTED;
2088 }
2089
2090
2091
2092
2093 journal->j_flags &= ~JBD2_ABORT;
2094
2095
2096
2097
2098 if (journal_reset(journal))
2099 goto recovery_error;
2100
2101 journal->j_flags |= JBD2_LOADED;
2102 return 0;
2103
2104recovery_error:
2105 printk(KERN_WARNING "JBD2: recovery failed\n");
2106 return -EIO;
2107}
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117int jbd2_journal_destroy(journal_t *journal)
2118{
2119 int err = 0;
2120
2121
2122 journal_kill_thread(journal);
2123
2124
2125 if (journal->j_running_transaction)
2126 jbd2_journal_commit_transaction(journal);
2127
2128
2129
2130
2131 spin_lock(&journal->j_list_lock);
2132 while (journal->j_checkpoint_transactions != NULL) {
2133 spin_unlock(&journal->j_list_lock);
2134 mutex_lock_io(&journal->j_checkpoint_mutex);
2135 err = jbd2_log_do_checkpoint(journal);
2136 mutex_unlock(&journal->j_checkpoint_mutex);
2137
2138
2139
2140
2141 if (err) {
2142 jbd2_journal_destroy_checkpoint(journal);
2143 spin_lock(&journal->j_list_lock);
2144 break;
2145 }
2146 spin_lock(&journal->j_list_lock);
2147 }
2148
2149 J_ASSERT(journal->j_running_transaction == NULL);
2150 J_ASSERT(journal->j_committing_transaction == NULL);
2151 J_ASSERT(journal->j_checkpoint_transactions == NULL);
2152 spin_unlock(&journal->j_list_lock);
2153
2154
2155
2156
2157
2158
2159
2160 if (!is_journal_aborted(journal) &&
2161 test_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags))
2162 jbd2_journal_abort(journal, -EIO);
2163
2164 if (journal->j_sb_buffer) {
2165 if (!is_journal_aborted(journal)) {
2166 mutex_lock_io(&journal->j_checkpoint_mutex);
2167
2168 write_lock(&journal->j_state_lock);
2169 journal->j_tail_sequence =
2170 ++journal->j_transaction_sequence;
2171 write_unlock(&journal->j_state_lock);
2172
2173 jbd2_mark_journal_empty(journal,
2174 REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
2175 mutex_unlock(&journal->j_checkpoint_mutex);
2176 } else
2177 err = -EIO;
2178 brelse(journal->j_sb_buffer);
2179 }
2180
2181 if (journal->j_shrinker.flags & SHRINKER_REGISTERED) {
2182 percpu_counter_destroy(&journal->j_checkpoint_jh_count);
2183 unregister_shrinker(&journal->j_shrinker);
2184 }
2185 if (journal->j_proc_entry)
2186 jbd2_stats_proc_exit(journal);
2187 iput(journal->j_inode);
2188 if (journal->j_revoke)
2189 jbd2_journal_destroy_revoke(journal);
2190 if (journal->j_chksum_driver)
2191 crypto_free_shash(journal->j_chksum_driver);
2192 kfree(journal->j_fc_wbuf);
2193 kfree(journal->j_wbuf);
2194 kfree(journal);
2195
2196 return err;
2197}
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat,
2212 unsigned long ro, unsigned long incompat)
2213{
2214 journal_superblock_t *sb;
2215
2216 if (!compat && !ro && !incompat)
2217 return 1;
2218
2219 if (journal->j_format_version == 0 &&
2220 journal_get_superblock(journal) != 0)
2221 return 0;
2222 if (journal->j_format_version == 1)
2223 return 0;
2224
2225 sb = journal->j_superblock;
2226
2227 if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) &&
2228 ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) &&
2229 ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat))
2230 return 1;
2231
2232 return 0;
2233}
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246int jbd2_journal_check_available_features(journal_t *journal, unsigned long compat,
2247 unsigned long ro, unsigned long incompat)
2248{
2249 if (!compat && !ro && !incompat)
2250 return 1;
2251
2252
2253
2254
2255
2256 if (journal->j_format_version != 2)
2257 return 0;
2258
2259 if ((compat & JBD2_KNOWN_COMPAT_FEATURES) == compat &&
2260 (ro & JBD2_KNOWN_ROCOMPAT_FEATURES) == ro &&
2261 (incompat & JBD2_KNOWN_INCOMPAT_FEATURES) == incompat)
2262 return 1;
2263
2264 return 0;
2265}
2266
2267static int
2268jbd2_journal_initialize_fast_commit(journal_t *journal)
2269{
2270 journal_superblock_t *sb = journal->j_superblock;
2271 unsigned long long num_fc_blks;
2272
2273 num_fc_blks = jbd2_journal_get_num_fc_blks(sb);
2274 if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS)
2275 return -ENOSPC;
2276
2277
2278 WARN_ON(journal->j_fc_wbuf != NULL);
2279 journal->j_fc_wbuf = kmalloc_array(num_fc_blks,
2280 sizeof(struct buffer_head *), GFP_KERNEL);
2281 if (!journal->j_fc_wbuf)
2282 return -ENOMEM;
2283
2284 journal->j_fc_wbufsize = num_fc_blks;
2285 journal->j_fc_last = journal->j_last;
2286 journal->j_last = journal->j_fc_last - num_fc_blks;
2287 journal->j_fc_first = journal->j_last + 1;
2288 journal->j_fc_off = 0;
2289 journal->j_free = journal->j_last - journal->j_first;
2290 journal->j_max_transaction_buffers =
2291 jbd2_journal_get_max_txn_bufs(journal);
2292
2293 return 0;
2294}
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
2309 unsigned long ro, unsigned long incompat)
2310{
2311#define INCOMPAT_FEATURE_ON(f) \
2312 ((incompat & (f)) && !(sb->s_feature_incompat & cpu_to_be32(f)))
2313#define COMPAT_FEATURE_ON(f) \
2314 ((compat & (f)) && !(sb->s_feature_compat & cpu_to_be32(f)))
2315 journal_superblock_t *sb;
2316
2317 if (jbd2_journal_check_used_features(journal, compat, ro, incompat))
2318 return 1;
2319
2320 if (!jbd2_journal_check_available_features(journal, compat, ro, incompat))
2321 return 0;
2322
2323
2324 if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) {
2325 incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2;
2326 incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3;
2327 }
2328
2329
2330 if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 &&
2331 compat & JBD2_FEATURE_COMPAT_CHECKSUM)
2332 compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM;
2333
2334 jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
2335 compat, ro, incompat);
2336
2337 sb = journal->j_superblock;
2338
2339 if (incompat & JBD2_FEATURE_INCOMPAT_FAST_COMMIT) {
2340 if (jbd2_journal_initialize_fast_commit(journal)) {
2341 pr_err("JBD2: Cannot enable fast commits.\n");
2342 return 0;
2343 }
2344 }
2345
2346
2347 if ((journal->j_chksum_driver == NULL) &&
2348 INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
2349 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
2350 if (IS_ERR(journal->j_chksum_driver)) {
2351 printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
2352 journal->j_chksum_driver = NULL;
2353 return 0;
2354 }
2355
2356 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
2357 sizeof(sb->s_uuid));
2358 }
2359
2360 lock_buffer(journal->j_sb_buffer);
2361
2362
2363 if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
2364 sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
2365 sb->s_feature_compat &=
2366 ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
2367 }
2368
2369
2370 if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM))
2371 sb->s_feature_incompat &=
2372 ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 |
2373 JBD2_FEATURE_INCOMPAT_CSUM_V3);
2374
2375 sb->s_feature_compat |= cpu_to_be32(compat);
2376 sb->s_feature_ro_compat |= cpu_to_be32(ro);
2377 sb->s_feature_incompat |= cpu_to_be32(incompat);
2378 unlock_buffer(journal->j_sb_buffer);
2379 journal->j_revoke_records_per_block =
2380 journal_revoke_records_per_block(journal);
2381
2382 return 1;
2383#undef COMPAT_FEATURE_ON
2384#undef INCOMPAT_FEATURE_ON
2385}
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
2399 unsigned long ro, unsigned long incompat)
2400{
2401 journal_superblock_t *sb;
2402
2403 jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n",
2404 compat, ro, incompat);
2405
2406 sb = journal->j_superblock;
2407
2408 sb->s_feature_compat &= ~cpu_to_be32(compat);
2409 sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
2410 sb->s_feature_incompat &= ~cpu_to_be32(incompat);
2411 journal->j_revoke_records_per_block =
2412 journal_revoke_records_per_block(journal);
2413}
2414EXPORT_SYMBOL(jbd2_journal_clear_features);
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430int jbd2_journal_flush(journal_t *journal, unsigned int flags)
2431{
2432 int err = 0;
2433 transaction_t *transaction = NULL;
2434
2435 write_lock(&journal->j_state_lock);
2436
2437
2438 if (journal->j_running_transaction) {
2439 transaction = journal->j_running_transaction;
2440 __jbd2_log_start_commit(journal, transaction->t_tid);
2441 } else if (journal->j_committing_transaction)
2442 transaction = journal->j_committing_transaction;
2443
2444
2445 if (transaction) {
2446 tid_t tid = transaction->t_tid;
2447
2448 write_unlock(&journal->j_state_lock);
2449 jbd2_log_wait_commit(journal, tid);
2450 } else {
2451 write_unlock(&journal->j_state_lock);
2452 }
2453
2454
2455 spin_lock(&journal->j_list_lock);
2456 while (!err && journal->j_checkpoint_transactions != NULL) {
2457 spin_unlock(&journal->j_list_lock);
2458 mutex_lock_io(&journal->j_checkpoint_mutex);
2459 err = jbd2_log_do_checkpoint(journal);
2460 mutex_unlock(&journal->j_checkpoint_mutex);
2461 spin_lock(&journal->j_list_lock);
2462 }
2463 spin_unlock(&journal->j_list_lock);
2464
2465 if (is_journal_aborted(journal))
2466 return -EIO;
2467
2468 mutex_lock_io(&journal->j_checkpoint_mutex);
2469 if (!err) {
2470 err = jbd2_cleanup_journal_tail(journal);
2471 if (err < 0) {
2472 mutex_unlock(&journal->j_checkpoint_mutex);
2473 goto out;
2474 }
2475 err = 0;
2476 }
2477
2478
2479
2480
2481
2482
2483 jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
2484
2485 if (flags)
2486 err = __jbd2_journal_erase(journal, flags);
2487
2488 mutex_unlock(&journal->j_checkpoint_mutex);
2489 write_lock(&journal->j_state_lock);
2490 J_ASSERT(!journal->j_running_transaction);
2491 J_ASSERT(!journal->j_committing_transaction);
2492 J_ASSERT(!journal->j_checkpoint_transactions);
2493 J_ASSERT(journal->j_head == journal->j_tail);
2494 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
2495 write_unlock(&journal->j_state_lock);
2496out:
2497 return err;
2498}
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513int jbd2_journal_wipe(journal_t *journal, int write)
2514{
2515 int err = 0;
2516
2517 J_ASSERT (!(journal->j_flags & JBD2_LOADED));
2518
2519 err = load_superblock(journal);
2520 if (err)
2521 return err;
2522
2523 if (!journal->j_tail)
2524 goto no_recovery;
2525
2526 printk(KERN_WARNING "JBD2: %s recovery information on journal\n",
2527 write ? "Clearing" : "Ignoring");
2528
2529 err = jbd2_journal_skip_recovery(journal);
2530 if (write) {
2531
2532 mutex_lock_io(&journal->j_checkpoint_mutex);
2533 jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
2534 mutex_unlock(&journal->j_checkpoint_mutex);
2535 }
2536
2537 no_recovery:
2538 return err;
2539}
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582void jbd2_journal_abort(journal_t *journal, int errno)
2583{
2584 transaction_t *transaction;
2585
2586
2587
2588
2589
2590
2591
2592 mutex_lock(&journal->j_abort_mutex);
2593
2594
2595
2596
2597
2598 write_lock(&journal->j_state_lock);
2599 if (journal->j_flags & JBD2_ABORT) {
2600 int old_errno = journal->j_errno;
2601
2602 write_unlock(&journal->j_state_lock);
2603 if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) {
2604 journal->j_errno = errno;
2605 jbd2_journal_update_sb_errno(journal);
2606 }
2607 mutex_unlock(&journal->j_abort_mutex);
2608 return;
2609 }
2610
2611
2612
2613
2614
2615 pr_err("Aborting journal on device %s.\n", journal->j_devname);
2616
2617 journal->j_flags |= JBD2_ABORT;
2618 journal->j_errno = errno;
2619 transaction = journal->j_running_transaction;
2620 if (transaction)
2621 __jbd2_log_start_commit(journal, transaction->t_tid);
2622 write_unlock(&journal->j_state_lock);
2623
2624
2625
2626
2627
2628 jbd2_journal_update_sb_errno(journal);
2629 mutex_unlock(&journal->j_abort_mutex);
2630}
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643int jbd2_journal_errno(journal_t *journal)
2644{
2645 int err;
2646
2647 read_lock(&journal->j_state_lock);
2648 if (journal->j_flags & JBD2_ABORT)
2649 err = -EROFS;
2650 else
2651 err = journal->j_errno;
2652 read_unlock(&journal->j_state_lock);
2653 return err;
2654}
2655
2656
2657
2658
2659
2660
2661
2662
2663int jbd2_journal_clear_err(journal_t *journal)
2664{
2665 int err = 0;
2666
2667 write_lock(&journal->j_state_lock);
2668 if (journal->j_flags & JBD2_ABORT)
2669 err = -EROFS;
2670 else
2671 journal->j_errno = 0;
2672 write_unlock(&journal->j_state_lock);
2673 return err;
2674}
2675
2676
2677
2678
2679
2680
2681
2682
2683void jbd2_journal_ack_err(journal_t *journal)
2684{
2685 write_lock(&journal->j_state_lock);
2686 if (journal->j_errno)
2687 journal->j_flags |= JBD2_ACK_ERR;
2688 write_unlock(&journal->j_state_lock);
2689}
2690
2691int jbd2_journal_blocks_per_page(struct inode *inode)
2692{
2693 return 1 << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
2694}
2695
2696
2697
2698
2699size_t journal_tag_bytes(journal_t *journal)
2700{
2701 size_t sz;
2702
2703 if (jbd2_has_feature_csum3(journal))
2704 return sizeof(journal_block_tag3_t);
2705
2706 sz = sizeof(journal_block_tag_t);
2707
2708 if (jbd2_has_feature_csum2(journal))
2709 sz += sizeof(__u16);
2710
2711 if (jbd2_has_feature_64bit(journal))
2712 return sz;
2713 else
2714 return sz - sizeof(__u32);
2715}
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732#define JBD2_MAX_SLABS 8
2733static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS];
2734
2735static const char *jbd2_slab_names[JBD2_MAX_SLABS] = {
2736 "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k",
2737 "jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k"
2738};
2739
2740
2741static void jbd2_journal_destroy_slabs(void)
2742{
2743 int i;
2744
2745 for (i = 0; i < JBD2_MAX_SLABS; i++) {
2746 kmem_cache_destroy(jbd2_slab[i]);
2747 jbd2_slab[i] = NULL;
2748 }
2749}
2750
2751static int jbd2_journal_create_slab(size_t size)
2752{
2753 static DEFINE_MUTEX(jbd2_slab_create_mutex);
2754 int i = order_base_2(size) - 10;
2755 size_t slab_size;
2756
2757 if (size == PAGE_SIZE)
2758 return 0;
2759
2760 if (i >= JBD2_MAX_SLABS)
2761 return -EINVAL;
2762
2763 if (unlikely(i < 0))
2764 i = 0;
2765 mutex_lock(&jbd2_slab_create_mutex);
2766 if (jbd2_slab[i]) {
2767 mutex_unlock(&jbd2_slab_create_mutex);
2768 return 0;
2769 }
2770
2771 slab_size = 1 << (i+10);
2772 jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size,
2773 slab_size, 0, NULL);
2774 mutex_unlock(&jbd2_slab_create_mutex);
2775 if (!jbd2_slab[i]) {
2776 printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n");
2777 return -ENOMEM;
2778 }
2779 return 0;
2780}
2781
2782static struct kmem_cache *get_slab(size_t size)
2783{
2784 int i = order_base_2(size) - 10;
2785
2786 BUG_ON(i >= JBD2_MAX_SLABS);
2787 if (unlikely(i < 0))
2788 i = 0;
2789 BUG_ON(jbd2_slab[i] == NULL);
2790 return jbd2_slab[i];
2791}
2792
2793void *jbd2_alloc(size_t size, gfp_t flags)
2794{
2795 void *ptr;
2796
2797 BUG_ON(size & (size-1));
2798
2799 if (size < PAGE_SIZE)
2800 ptr = kmem_cache_alloc(get_slab(size), flags);
2801 else
2802 ptr = (void *)__get_free_pages(flags, get_order(size));
2803
2804
2805
2806 BUG_ON(((unsigned long) ptr) & (size-1));
2807
2808 return ptr;
2809}
2810
2811void jbd2_free(void *ptr, size_t size)
2812{
2813 if (size < PAGE_SIZE)
2814 kmem_cache_free(get_slab(size), ptr);
2815 else
2816 free_pages((unsigned long)ptr, get_order(size));
2817};
2818
2819
2820
2821
2822static struct kmem_cache *jbd2_journal_head_cache;
2823#ifdef CONFIG_JBD2_DEBUG
2824static atomic_t nr_journal_heads = ATOMIC_INIT(0);
2825#endif
2826
2827static int __init jbd2_journal_init_journal_head_cache(void)
2828{
2829 J_ASSERT(!jbd2_journal_head_cache);
2830 jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
2831 sizeof(struct journal_head),
2832 0,
2833 SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU,
2834 NULL);
2835 if (!jbd2_journal_head_cache) {
2836 printk(KERN_EMERG "JBD2: no memory for journal_head cache\n");
2837 return -ENOMEM;
2838 }
2839 return 0;
2840}
2841
2842static void jbd2_journal_destroy_journal_head_cache(void)
2843{
2844 kmem_cache_destroy(jbd2_journal_head_cache);
2845 jbd2_journal_head_cache = NULL;
2846}
2847
2848
2849
2850
2851static struct journal_head *journal_alloc_journal_head(void)
2852{
2853 struct journal_head *ret;
2854
2855#ifdef CONFIG_JBD2_DEBUG
2856 atomic_inc(&nr_journal_heads);
2857#endif
2858 ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS);
2859 if (!ret) {
2860 jbd_debug(1, "out of memory for journal_head\n");
2861 pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__);
2862 ret = kmem_cache_zalloc(jbd2_journal_head_cache,
2863 GFP_NOFS | __GFP_NOFAIL);
2864 }
2865 if (ret)
2866 spin_lock_init(&ret->b_state_lock);
2867 return ret;
2868}
2869
2870static void journal_free_journal_head(struct journal_head *jh)
2871{
2872#ifdef CONFIG_JBD2_DEBUG
2873 atomic_dec(&nr_journal_heads);
2874 memset(jh, JBD2_POISON_FREE, sizeof(*jh));
2875#endif
2876 kmem_cache_free(jbd2_journal_head_cache, jh);
2877}
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh)
2921{
2922 struct journal_head *jh;
2923 struct journal_head *new_jh = NULL;
2924
2925repeat:
2926 if (!buffer_jbd(bh))
2927 new_jh = journal_alloc_journal_head();
2928
2929 jbd_lock_bh_journal_head(bh);
2930 if (buffer_jbd(bh)) {
2931 jh = bh2jh(bh);
2932 } else {
2933 J_ASSERT_BH(bh,
2934 (atomic_read(&bh->b_count) > 0) ||
2935 (bh->b_page && bh->b_page->mapping));
2936
2937 if (!new_jh) {
2938 jbd_unlock_bh_journal_head(bh);
2939 goto repeat;
2940 }
2941
2942 jh = new_jh;
2943 new_jh = NULL;
2944 set_buffer_jbd(bh);
2945 bh->b_private = jh;
2946 jh->b_bh = bh;
2947 get_bh(bh);
2948 BUFFER_TRACE(bh, "added journal_head");
2949 }
2950 jh->b_jcount++;
2951 jbd_unlock_bh_journal_head(bh);
2952 if (new_jh)
2953 journal_free_journal_head(new_jh);
2954 return bh->b_private;
2955}
2956
2957
2958
2959
2960
2961struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh)
2962{
2963 struct journal_head *jh = NULL;
2964
2965 jbd_lock_bh_journal_head(bh);
2966 if (buffer_jbd(bh)) {
2967 jh = bh2jh(bh);
2968 jh->b_jcount++;
2969 }
2970 jbd_unlock_bh_journal_head(bh);
2971 return jh;
2972}
2973
2974static void __journal_remove_journal_head(struct buffer_head *bh)
2975{
2976 struct journal_head *jh = bh2jh(bh);
2977
2978 J_ASSERT_JH(jh, jh->b_transaction == NULL);
2979 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
2980 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
2981 J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
2982 J_ASSERT_BH(bh, buffer_jbd(bh));
2983 J_ASSERT_BH(bh, jh2bh(jh) == bh);
2984 BUFFER_TRACE(bh, "remove journal_head");
2985
2986
2987 bh->b_private = NULL;
2988 jh->b_bh = NULL;
2989 clear_buffer_jbd(bh);
2990}
2991
2992static void journal_release_journal_head(struct journal_head *jh, size_t b_size)
2993{
2994 if (jh->b_frozen_data) {
2995 printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
2996 jbd2_free(jh->b_frozen_data, b_size);
2997 }
2998 if (jh->b_committed_data) {
2999 printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
3000 jbd2_free(jh->b_committed_data, b_size);
3001 }
3002 journal_free_journal_head(jh);
3003}
3004
3005
3006
3007
3008
3009void jbd2_journal_put_journal_head(struct journal_head *jh)
3010{
3011 struct buffer_head *bh = jh2bh(jh);
3012
3013 jbd_lock_bh_journal_head(bh);
3014 J_ASSERT_JH(jh, jh->b_jcount > 0);
3015 --jh->b_jcount;
3016 if (!jh->b_jcount) {
3017 __journal_remove_journal_head(bh);
3018 jbd_unlock_bh_journal_head(bh);
3019 journal_release_journal_head(jh, bh->b_size);
3020 __brelse(bh);
3021 } else {
3022 jbd_unlock_bh_journal_head(bh);
3023 }
3024}
3025
3026
3027
3028
3029void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
3030{
3031 jinode->i_transaction = NULL;
3032 jinode->i_next_transaction = NULL;
3033 jinode->i_vfs_inode = inode;
3034 jinode->i_flags = 0;
3035 jinode->i_dirty_start = 0;
3036 jinode->i_dirty_end = 0;
3037 INIT_LIST_HEAD(&jinode->i_list);
3038}
3039
3040
3041
3042
3043
3044
3045void jbd2_journal_release_jbd_inode(journal_t *journal,
3046 struct jbd2_inode *jinode)
3047{
3048 if (!journal)
3049 return;
3050restart:
3051 spin_lock(&journal->j_list_lock);
3052
3053 if (jinode->i_flags & JI_COMMIT_RUNNING) {
3054 wait_queue_head_t *wq;
3055 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
3056 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
3057 prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
3058 spin_unlock(&journal->j_list_lock);
3059 schedule();
3060 finish_wait(wq, &wait.wq_entry);
3061 goto restart;
3062 }
3063
3064 if (jinode->i_transaction) {
3065 list_del(&jinode->i_list);
3066 jinode->i_transaction = NULL;
3067 }
3068 spin_unlock(&journal->j_list_lock);
3069}
3070
3071
3072#ifdef CONFIG_PROC_FS
3073
3074#define JBD2_STATS_PROC_NAME "fs/jbd2"
3075
3076static void __init jbd2_create_jbd_stats_proc_entry(void)
3077{
3078 proc_jbd2_stats = proc_mkdir(JBD2_STATS_PROC_NAME, NULL);
3079}
3080
3081static void __exit jbd2_remove_jbd_stats_proc_entry(void)
3082{
3083 if (proc_jbd2_stats)
3084 remove_proc_entry(JBD2_STATS_PROC_NAME, NULL);
3085}
3086
3087#else
3088
3089#define jbd2_create_jbd_stats_proc_entry() do {} while (0)
3090#define jbd2_remove_jbd_stats_proc_entry() do {} while (0)
3091
3092#endif
3093
3094struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;
3095
3096static int __init jbd2_journal_init_inode_cache(void)
3097{
3098 J_ASSERT(!jbd2_inode_cache);
3099 jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0);
3100 if (!jbd2_inode_cache) {
3101 pr_emerg("JBD2: failed to create inode cache\n");
3102 return -ENOMEM;
3103 }
3104 return 0;
3105}
3106
3107static int __init jbd2_journal_init_handle_cache(void)
3108{
3109 J_ASSERT(!jbd2_handle_cache);
3110 jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
3111 if (!jbd2_handle_cache) {
3112 printk(KERN_EMERG "JBD2: failed to create handle cache\n");
3113 return -ENOMEM;
3114 }
3115 return 0;
3116}
3117
3118static void jbd2_journal_destroy_inode_cache(void)
3119{
3120 kmem_cache_destroy(jbd2_inode_cache);
3121 jbd2_inode_cache = NULL;
3122}
3123
3124static void jbd2_journal_destroy_handle_cache(void)
3125{
3126 kmem_cache_destroy(jbd2_handle_cache);
3127 jbd2_handle_cache = NULL;
3128}
3129
3130
3131
3132
3133
3134static int __init journal_init_caches(void)
3135{
3136 int ret;
3137
3138 ret = jbd2_journal_init_revoke_record_cache();
3139 if (ret == 0)
3140 ret = jbd2_journal_init_revoke_table_cache();
3141 if (ret == 0)
3142 ret = jbd2_journal_init_journal_head_cache();
3143 if (ret == 0)
3144 ret = jbd2_journal_init_handle_cache();
3145 if (ret == 0)
3146 ret = jbd2_journal_init_inode_cache();
3147 if (ret == 0)
3148 ret = jbd2_journal_init_transaction_cache();
3149 return ret;
3150}
3151
3152static void jbd2_journal_destroy_caches(void)
3153{
3154 jbd2_journal_destroy_revoke_record_cache();
3155 jbd2_journal_destroy_revoke_table_cache();
3156 jbd2_journal_destroy_journal_head_cache();
3157 jbd2_journal_destroy_handle_cache();
3158 jbd2_journal_destroy_inode_cache();
3159 jbd2_journal_destroy_transaction_cache();
3160 jbd2_journal_destroy_slabs();
3161}
3162
3163static int __init journal_init(void)
3164{
3165 int ret;
3166
3167 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
3168
3169 ret = journal_init_caches();
3170 if (ret == 0) {
3171 jbd2_create_jbd_stats_proc_entry();
3172 } else {
3173 jbd2_journal_destroy_caches();
3174 }
3175 return ret;
3176}
3177
3178static void __exit journal_exit(void)
3179{
3180#ifdef CONFIG_JBD2_DEBUG
3181 int n = atomic_read(&nr_journal_heads);
3182 if (n)
3183 printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n);
3184#endif
3185 jbd2_remove_jbd_stats_proc_entry();
3186 jbd2_journal_destroy_caches();
3187}
3188
3189MODULE_LICENSE("GPL");
3190module_init(journal_init);
3191module_exit(journal_exit);
3192
3193