1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/module.h>
23#include <linux/time.h>
24#include <linux/fs.h>
25#include <linux/jbd2.h>
26#include <linux/errno.h>
27#include <linux/slab.h>
28#include <linux/init.h>
29#include <linux/mm.h>
30#include <linux/freezer.h>
31#include <linux/pagemap.h>
32#include <linux/kthread.h>
33#include <linux/poison.h>
34#include <linux/proc_fs.h>
35#include <linux/seq_file.h>
36#include <linux/math64.h>
37#include <linux/hash.h>
38#include <linux/log2.h>
39#include <linux/vmalloc.h>
40#include <linux/backing-dev.h>
41#include <linux/bitops.h>
42#include <linux/ratelimit.h>
43#include <linux/sched/mm.h>
44
45#define CREATE_TRACE_POINTS
46#include <trace/events/jbd2.h>
47
48#include <linux/uaccess.h>
49#include <asm/page.h>
50
51#ifdef CONFIG_JBD2_DEBUG
52ushort jbd2_journal_enable_debug __read_mostly;
53EXPORT_SYMBOL(jbd2_journal_enable_debug);
54
55module_param_named(jbd2_debug, jbd2_journal_enable_debug, ushort, 0644);
56MODULE_PARM_DESC(jbd2_debug, "Debugging level for jbd2");
57#endif
58
59EXPORT_SYMBOL(jbd2_journal_extend);
60EXPORT_SYMBOL(jbd2_journal_stop);
61EXPORT_SYMBOL(jbd2_journal_lock_updates);
62EXPORT_SYMBOL(jbd2_journal_unlock_updates);
63EXPORT_SYMBOL(jbd2_journal_get_write_access);
64EXPORT_SYMBOL(jbd2_journal_get_create_access);
65EXPORT_SYMBOL(jbd2_journal_get_undo_access);
66EXPORT_SYMBOL(jbd2_journal_set_triggers);
67EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
68EXPORT_SYMBOL(jbd2_journal_forget);
69EXPORT_SYMBOL(jbd2_journal_flush);
70EXPORT_SYMBOL(jbd2_journal_revoke);
71
72EXPORT_SYMBOL(jbd2_journal_init_dev);
73EXPORT_SYMBOL(jbd2_journal_init_inode);
74EXPORT_SYMBOL(jbd2_journal_check_used_features);
75EXPORT_SYMBOL(jbd2_journal_check_available_features);
76EXPORT_SYMBOL(jbd2_journal_set_features);
77EXPORT_SYMBOL(jbd2_journal_load);
78EXPORT_SYMBOL(jbd2_journal_destroy);
79EXPORT_SYMBOL(jbd2_journal_abort);
80EXPORT_SYMBOL(jbd2_journal_errno);
81EXPORT_SYMBOL(jbd2_journal_ack_err);
82EXPORT_SYMBOL(jbd2_journal_clear_err);
83EXPORT_SYMBOL(jbd2_log_wait_commit);
84EXPORT_SYMBOL(jbd2_log_start_commit);
85EXPORT_SYMBOL(jbd2_journal_start_commit);
86EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
87EXPORT_SYMBOL(jbd2_journal_wipe);
88EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
89EXPORT_SYMBOL(jbd2_journal_invalidatepage);
90EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
91EXPORT_SYMBOL(jbd2_journal_force_commit);
92EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
93EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
94EXPORT_SYMBOL(jbd2_journal_submit_inode_data_buffers);
95EXPORT_SYMBOL(jbd2_journal_finish_inode_data_buffers);
96EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
97EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
98EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
99EXPORT_SYMBOL(jbd2_inode_cache);
100
101static int jbd2_journal_create_slab(size_t slab_size);
102
103#ifdef CONFIG_JBD2_DEBUG
104void __jbd2_debug(int level, const char *file, const char *func,
105 unsigned int line, const char *fmt, ...)
106{
107 struct va_format vaf;
108 va_list args;
109
110 if (level > jbd2_journal_enable_debug)
111 return;
112 va_start(args, fmt);
113 vaf.fmt = fmt;
114 vaf.va = &args;
115 printk(KERN_DEBUG "%s: (%s, %u): %pV", file, func, line, &vaf);
116 va_end(args);
117}
118EXPORT_SYMBOL(__jbd2_debug);
119#endif
120
121
122static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
123{
124 if (!jbd2_journal_has_csum_v2or3_feature(j))
125 return 1;
126
127 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
128}
129
130static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
131{
132 __u32 csum;
133 __be32 old_csum;
134
135 old_csum = sb->s_checksum;
136 sb->s_checksum = 0;
137 csum = jbd2_chksum(j, ~0, (char *)sb, sizeof(journal_superblock_t));
138 sb->s_checksum = old_csum;
139
140 return cpu_to_be32(csum);
141}
142
143
144
145
146
147static void commit_timeout(struct timer_list *t)
148{
149 journal_t *journal = from_timer(journal, t, j_commit_timer);
150
151 wake_up_process(journal->j_task);
152}
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172static int kjournald2(void *arg)
173{
174 journal_t *journal = arg;
175 transaction_t *transaction;
176
177
178
179
180
181 timer_setup(&journal->j_commit_timer, commit_timeout, 0);
182
183 set_freezable();
184
185
186 journal->j_task = current;
187 wake_up(&journal->j_wait_done_commit);
188
189
190
191
192
193
194
195 memalloc_nofs_save();
196
197
198
199
200 write_lock(&journal->j_state_lock);
201
202loop:
203 if (journal->j_flags & JBD2_UNMOUNT)
204 goto end_loop;
205
206 jbd_debug(1, "commit_sequence=%u, commit_request=%u\n",
207 journal->j_commit_sequence, journal->j_commit_request);
208
209 if (journal->j_commit_sequence != journal->j_commit_request) {
210 jbd_debug(1, "OK, requests differ\n");
211 write_unlock(&journal->j_state_lock);
212 del_timer_sync(&journal->j_commit_timer);
213 jbd2_journal_commit_transaction(journal);
214 write_lock(&journal->j_state_lock);
215 goto loop;
216 }
217
218 wake_up(&journal->j_wait_done_commit);
219 if (freezing(current)) {
220
221
222
223
224
225 jbd_debug(1, "Now suspending kjournald2\n");
226 write_unlock(&journal->j_state_lock);
227 try_to_freeze();
228 write_lock(&journal->j_state_lock);
229 } else {
230
231
232
233
234 DEFINE_WAIT(wait);
235 int should_sleep = 1;
236
237 prepare_to_wait(&journal->j_wait_commit, &wait,
238 TASK_INTERRUPTIBLE);
239 if (journal->j_commit_sequence != journal->j_commit_request)
240 should_sleep = 0;
241 transaction = journal->j_running_transaction;
242 if (transaction && time_after_eq(jiffies,
243 transaction->t_expires))
244 should_sleep = 0;
245 if (journal->j_flags & JBD2_UNMOUNT)
246 should_sleep = 0;
247 if (should_sleep) {
248 write_unlock(&journal->j_state_lock);
249 schedule();
250 write_lock(&journal->j_state_lock);
251 }
252 finish_wait(&journal->j_wait_commit, &wait);
253 }
254
255 jbd_debug(1, "kjournald2 wakes\n");
256
257
258
259
260 transaction = journal->j_running_transaction;
261 if (transaction && time_after_eq(jiffies, transaction->t_expires)) {
262 journal->j_commit_request = transaction->t_tid;
263 jbd_debug(1, "woke because of timeout\n");
264 }
265 goto loop;
266
267end_loop:
268 del_timer_sync(&journal->j_commit_timer);
269 journal->j_task = NULL;
270 wake_up(&journal->j_wait_done_commit);
271 jbd_debug(1, "Journal thread exiting.\n");
272 write_unlock(&journal->j_state_lock);
273 return 0;
274}
275
276static int jbd2_journal_start_thread(journal_t *journal)
277{
278 struct task_struct *t;
279
280 t = kthread_run(kjournald2, journal, "jbd2/%s",
281 journal->j_devname);
282 if (IS_ERR(t))
283 return PTR_ERR(t);
284
285 wait_event(journal->j_wait_done_commit, journal->j_task != NULL);
286 return 0;
287}
288
289static void journal_kill_thread(journal_t *journal)
290{
291 write_lock(&journal->j_state_lock);
292 journal->j_flags |= JBD2_UNMOUNT;
293
294 while (journal->j_task) {
295 write_unlock(&journal->j_state_lock);
296 wake_up(&journal->j_wait_commit);
297 wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
298 write_lock(&journal->j_state_lock);
299 }
300 write_unlock(&journal->j_state_lock);
301}
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
339 struct journal_head *jh_in,
340 struct buffer_head **bh_out,
341 sector_t blocknr)
342{
343 int need_copy_out = 0;
344 int done_copy_out = 0;
345 int do_escape = 0;
346 char *mapped_data;
347 struct buffer_head *new_bh;
348 struct page *new_page;
349 unsigned int new_offset;
350 struct buffer_head *bh_in = jh2bh(jh_in);
351 journal_t *journal = transaction->t_journal;
352
353
354
355
356
357
358
359
360
361
362 J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
363
364 new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
365
366
367 atomic_set(&new_bh->b_count, 1);
368
369 spin_lock(&jh_in->b_state_lock);
370repeat:
371
372
373
374
375 if (jh_in->b_frozen_data) {
376 done_copy_out = 1;
377 new_page = virt_to_page(jh_in->b_frozen_data);
378 new_offset = offset_in_page(jh_in->b_frozen_data);
379 } else {
380 new_page = jh2bh(jh_in)->b_page;
381 new_offset = offset_in_page(jh2bh(jh_in)->b_data);
382 }
383
384 mapped_data = kmap_atomic(new_page);
385
386
387
388
389
390
391 if (!done_copy_out)
392 jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
393 jh_in->b_triggers);
394
395
396
397
398 if (*((__be32 *)(mapped_data + new_offset)) ==
399 cpu_to_be32(JBD2_MAGIC_NUMBER)) {
400 need_copy_out = 1;
401 do_escape = 1;
402 }
403 kunmap_atomic(mapped_data);
404
405
406
407
408 if (need_copy_out && !done_copy_out) {
409 char *tmp;
410
411 spin_unlock(&jh_in->b_state_lock);
412 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
413 if (!tmp) {
414 brelse(new_bh);
415 return -ENOMEM;
416 }
417 spin_lock(&jh_in->b_state_lock);
418 if (jh_in->b_frozen_data) {
419 jbd2_free(tmp, bh_in->b_size);
420 goto repeat;
421 }
422
423 jh_in->b_frozen_data = tmp;
424 mapped_data = kmap_atomic(new_page);
425 memcpy(tmp, mapped_data + new_offset, bh_in->b_size);
426 kunmap_atomic(mapped_data);
427
428 new_page = virt_to_page(tmp);
429 new_offset = offset_in_page(tmp);
430 done_copy_out = 1;
431
432
433
434
435
436
437 jh_in->b_frozen_triggers = jh_in->b_triggers;
438 }
439
440
441
442
443
444 if (do_escape) {
445 mapped_data = kmap_atomic(new_page);
446 *((unsigned int *)(mapped_data + new_offset)) = 0;
447 kunmap_atomic(mapped_data);
448 }
449
450 set_bh_page(new_bh, new_page, new_offset);
451 new_bh->b_size = bh_in->b_size;
452 new_bh->b_bdev = journal->j_dev;
453 new_bh->b_blocknr = blocknr;
454 new_bh->b_private = bh_in;
455 set_buffer_mapped(new_bh);
456 set_buffer_dirty(new_bh);
457
458 *bh_out = new_bh;
459
460
461
462
463
464
465 JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
466 spin_lock(&journal->j_list_lock);
467 __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
468 spin_unlock(&journal->j_list_lock);
469 set_buffer_shadow(bh_in);
470 spin_unlock(&jh_in->b_state_lock);
471
472 return do_escape | (done_copy_out << 1);
473}
474
475
476
477
478
479
480
481
482
483
484int __jbd2_log_start_commit(journal_t *journal, tid_t target)
485{
486
487 if (journal->j_commit_request == target)
488 return 0;
489
490
491
492
493
494
495 if (journal->j_running_transaction &&
496 journal->j_running_transaction->t_tid == target) {
497
498
499
500
501
502 journal->j_commit_request = target;
503 jbd_debug(1, "JBD2: requesting commit %u/%u\n",
504 journal->j_commit_request,
505 journal->j_commit_sequence);
506 journal->j_running_transaction->t_requested = jiffies;
507 wake_up(&journal->j_wait_commit);
508 return 1;
509 } else if (!tid_geq(journal->j_commit_request, target))
510
511
512
513 WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n",
514 journal->j_commit_request,
515 journal->j_commit_sequence,
516 target, journal->j_running_transaction ?
517 journal->j_running_transaction->t_tid : 0);
518 return 0;
519}
520
521int jbd2_log_start_commit(journal_t *journal, tid_t tid)
522{
523 int ret;
524
525 write_lock(&journal->j_state_lock);
526 ret = __jbd2_log_start_commit(journal, tid);
527 write_unlock(&journal->j_state_lock);
528 return ret;
529}
530
531
532
533
534
535
536
537
538static int __jbd2_journal_force_commit(journal_t *journal)
539{
540 transaction_t *transaction = NULL;
541 tid_t tid;
542 int need_to_start = 0, ret = 0;
543
544 read_lock(&journal->j_state_lock);
545 if (journal->j_running_transaction && !current->journal_info) {
546 transaction = journal->j_running_transaction;
547 if (!tid_geq(journal->j_commit_request, transaction->t_tid))
548 need_to_start = 1;
549 } else if (journal->j_committing_transaction)
550 transaction = journal->j_committing_transaction;
551
552 if (!transaction) {
553
554 read_unlock(&journal->j_state_lock);
555 return 0;
556 }
557 tid = transaction->t_tid;
558 read_unlock(&journal->j_state_lock);
559 if (need_to_start)
560 jbd2_log_start_commit(journal, tid);
561 ret = jbd2_log_wait_commit(journal, tid);
562 if (!ret)
563 ret = 1;
564
565 return ret;
566}
567
568
569
570
571
572
573
574
575
576
577
578int jbd2_journal_force_commit_nested(journal_t *journal)
579{
580 int ret;
581
582 ret = __jbd2_journal_force_commit(journal);
583 return ret > 0;
584}
585
586
587
588
589
590
591
592
593int jbd2_journal_force_commit(journal_t *journal)
594{
595 int ret;
596
597 J_ASSERT(!current->journal_info);
598 ret = __jbd2_journal_force_commit(journal);
599 if (ret > 0)
600 ret = 0;
601 return ret;
602}
603
604
605
606
607
608
609int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
610{
611 int ret = 0;
612
613 write_lock(&journal->j_state_lock);
614 if (journal->j_running_transaction) {
615 tid_t tid = journal->j_running_transaction->t_tid;
616
617 __jbd2_log_start_commit(journal, tid);
618
619
620 if (ptid)
621 *ptid = tid;
622 ret = 1;
623 } else if (journal->j_committing_transaction) {
624
625
626
627
628 if (ptid)
629 *ptid = journal->j_committing_transaction->t_tid;
630 ret = 1;
631 }
632 write_unlock(&journal->j_state_lock);
633 return ret;
634}
635
636
637
638
639
640
641
642int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
643{
644 int ret = 0;
645 transaction_t *commit_trans;
646
647 if (!(journal->j_flags & JBD2_BARRIER))
648 return 0;
649 read_lock(&journal->j_state_lock);
650
651 if (tid_geq(journal->j_commit_sequence, tid))
652 goto out;
653 commit_trans = journal->j_committing_transaction;
654 if (!commit_trans || commit_trans->t_tid != tid) {
655 ret = 1;
656 goto out;
657 }
658
659
660
661
662 if (journal->j_fs_dev != journal->j_dev) {
663 if (!commit_trans->t_need_data_flush ||
664 commit_trans->t_state >= T_COMMIT_DFLUSH)
665 goto out;
666 } else {
667 if (commit_trans->t_state >= T_COMMIT_JFLUSH)
668 goto out;
669 }
670 ret = 1;
671out:
672 read_unlock(&journal->j_state_lock);
673 return ret;
674}
675EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier);
676
677
678
679
680
681int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
682{
683 int err = 0;
684
685 read_lock(&journal->j_state_lock);
686#ifdef CONFIG_PROVE_LOCKING
687
688
689
690
691
692 if (tid_gt(tid, journal->j_commit_sequence) &&
693 (!journal->j_committing_transaction ||
694 journal->j_committing_transaction->t_tid != tid)) {
695 read_unlock(&journal->j_state_lock);
696 jbd2_might_wait_for_commit(journal);
697 read_lock(&journal->j_state_lock);
698 }
699#endif
700#ifdef CONFIG_JBD2_DEBUG
701 if (!tid_geq(journal->j_commit_request, tid)) {
702 printk(KERN_ERR
703 "%s: error: j_commit_request=%u, tid=%u\n",
704 __func__, journal->j_commit_request, tid);
705 }
706#endif
707 while (tid_gt(tid, journal->j_commit_sequence)) {
708 jbd_debug(1, "JBD2: want %u, j_commit_sequence=%u\n",
709 tid, journal->j_commit_sequence);
710 read_unlock(&journal->j_state_lock);
711 wake_up(&journal->j_wait_commit);
712 wait_event(journal->j_wait_done_commit,
713 !tid_gt(tid, journal->j_commit_sequence));
714 read_lock(&journal->j_state_lock);
715 }
716 read_unlock(&journal->j_state_lock);
717
718 if (unlikely(is_journal_aborted(journal)))
719 err = -EIO;
720 return err;
721}
722
723
724
725
726
727
728
729
730int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
731{
732 if (unlikely(is_journal_aborted(journal)))
733 return -EIO;
734
735
736
737
738 if (!journal->j_stats.ts_tid)
739 return -EINVAL;
740
741 write_lock(&journal->j_state_lock);
742 if (tid <= journal->j_commit_sequence) {
743 write_unlock(&journal->j_state_lock);
744 return -EALREADY;
745 }
746
747 if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
748 (journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) {
749 DEFINE_WAIT(wait);
750
751 prepare_to_wait(&journal->j_fc_wait, &wait,
752 TASK_UNINTERRUPTIBLE);
753 write_unlock(&journal->j_state_lock);
754 schedule();
755 finish_wait(&journal->j_fc_wait, &wait);
756 return -EALREADY;
757 }
758 journal->j_flags |= JBD2_FAST_COMMIT_ONGOING;
759 write_unlock(&journal->j_state_lock);
760
761 return 0;
762}
763EXPORT_SYMBOL(jbd2_fc_begin_commit);
764
765
766
767
768
769static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
770{
771 if (journal->j_fc_cleanup_callback)
772 journal->j_fc_cleanup_callback(journal, 0);
773 write_lock(&journal->j_state_lock);
774 journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
775 if (fallback)
776 journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
777 write_unlock(&journal->j_state_lock);
778 wake_up(&journal->j_fc_wait);
779 if (fallback)
780 return jbd2_complete_transaction(journal, tid);
781 return 0;
782}
783
784int jbd2_fc_end_commit(journal_t *journal)
785{
786 return __jbd2_fc_end_commit(journal, 0, false);
787}
788EXPORT_SYMBOL(jbd2_fc_end_commit);
789
790int jbd2_fc_end_commit_fallback(journal_t *journal)
791{
792 tid_t tid;
793
794 read_lock(&journal->j_state_lock);
795 tid = journal->j_running_transaction ?
796 journal->j_running_transaction->t_tid : 0;
797 read_unlock(&journal->j_state_lock);
798 return __jbd2_fc_end_commit(journal, tid, true);
799}
800EXPORT_SYMBOL(jbd2_fc_end_commit_fallback);
801
802
803int jbd2_transaction_committed(journal_t *journal, tid_t tid)
804{
805 int ret = 1;
806
807 read_lock(&journal->j_state_lock);
808 if (journal->j_running_transaction &&
809 journal->j_running_transaction->t_tid == tid)
810 ret = 0;
811 if (journal->j_committing_transaction &&
812 journal->j_committing_transaction->t_tid == tid)
813 ret = 0;
814 read_unlock(&journal->j_state_lock);
815 return ret;
816}
817EXPORT_SYMBOL(jbd2_transaction_committed);
818
819
820
821
822
823
824
825
826int jbd2_complete_transaction(journal_t *journal, tid_t tid)
827{
828 int need_to_wait = 1;
829
830 read_lock(&journal->j_state_lock);
831 if (journal->j_running_transaction &&
832 journal->j_running_transaction->t_tid == tid) {
833 if (journal->j_commit_request != tid) {
834
835 read_unlock(&journal->j_state_lock);
836 jbd2_log_start_commit(journal, tid);
837 goto wait_commit;
838 }
839 } else if (!(journal->j_committing_transaction &&
840 journal->j_committing_transaction->t_tid == tid))
841 need_to_wait = 0;
842 read_unlock(&journal->j_state_lock);
843 if (!need_to_wait)
844 return 0;
845wait_commit:
846 return jbd2_log_wait_commit(journal, tid);
847}
848EXPORT_SYMBOL(jbd2_complete_transaction);
849
850
851
852
853
854int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
855{
856 unsigned long blocknr;
857
858 write_lock(&journal->j_state_lock);
859 J_ASSERT(journal->j_free > 1);
860
861 blocknr = journal->j_head;
862 journal->j_head++;
863 journal->j_free--;
864 if (journal->j_head == journal->j_last)
865 journal->j_head = journal->j_first;
866 write_unlock(&journal->j_state_lock);
867 return jbd2_journal_bmap(journal, blocknr, retp);
868}
869
870
871int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
872{
873 unsigned long long pblock;
874 unsigned long blocknr;
875 int ret = 0;
876 struct buffer_head *bh;
877 int fc_off;
878
879 *bh_out = NULL;
880
881 if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
882 fc_off = journal->j_fc_off;
883 blocknr = journal->j_fc_first + fc_off;
884 journal->j_fc_off++;
885 } else {
886 ret = -EINVAL;
887 }
888
889 if (ret)
890 return ret;
891
892 ret = jbd2_journal_bmap(journal, blocknr, &pblock);
893 if (ret)
894 return ret;
895
896 bh = __getblk(journal->j_dev, pblock, journal->j_blocksize);
897 if (!bh)
898 return -ENOMEM;
899
900
901 journal->j_fc_wbuf[fc_off] = bh;
902
903 *bh_out = bh;
904
905 return 0;
906}
907EXPORT_SYMBOL(jbd2_fc_get_buf);
908
909
910
911
912
913int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
914{
915 struct buffer_head *bh;
916 int i, j_fc_off;
917
918 j_fc_off = journal->j_fc_off;
919
920
921
922
923
924 for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) {
925 bh = journal->j_fc_wbuf[i];
926 wait_on_buffer(bh);
927 put_bh(bh);
928 journal->j_fc_wbuf[i] = NULL;
929 if (unlikely(!buffer_uptodate(bh)))
930 return -EIO;
931 }
932
933 return 0;
934}
935EXPORT_SYMBOL(jbd2_fc_wait_bufs);
936
937
938
939
940
941int jbd2_fc_release_bufs(journal_t *journal)
942{
943 struct buffer_head *bh;
944 int i, j_fc_off;
945
946 j_fc_off = journal->j_fc_off;
947
948
949
950
951
952 for (i = j_fc_off - 1; i >= 0; i--) {
953 bh = journal->j_fc_wbuf[i];
954 if (!bh)
955 break;
956 put_bh(bh);
957 journal->j_fc_wbuf[i] = NULL;
958 }
959
960 return 0;
961}
962EXPORT_SYMBOL(jbd2_fc_release_bufs);
963
964
965
966
967
968
969
970
971int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
972 unsigned long long *retp)
973{
974 int err = 0;
975 unsigned long long ret;
976 sector_t block = 0;
977
978 if (journal->j_inode) {
979 block = blocknr;
980 ret = bmap(journal->j_inode, &block);
981
982 if (ret || !block) {
983 printk(KERN_ALERT "%s: journal block not found "
984 "at offset %lu on %s\n",
985 __func__, blocknr, journal->j_devname);
986 err = -EIO;
987 jbd2_journal_abort(journal, err);
988 } else {
989 *retp = block;
990 }
991
992 } else {
993 *retp = blocknr;
994 }
995 return err;
996}
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008struct buffer_head *
1009jbd2_journal_get_descriptor_buffer(transaction_t *transaction, int type)
1010{
1011 journal_t *journal = transaction->t_journal;
1012 struct buffer_head *bh;
1013 unsigned long long blocknr;
1014 journal_header_t *header;
1015 int err;
1016
1017 err = jbd2_journal_next_log_block(journal, &blocknr);
1018
1019 if (err)
1020 return NULL;
1021
1022 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
1023 if (!bh)
1024 return NULL;
1025 atomic_dec(&transaction->t_outstanding_credits);
1026 lock_buffer(bh);
1027 memset(bh->b_data, 0, journal->j_blocksize);
1028 header = (journal_header_t *)bh->b_data;
1029 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
1030 header->h_blocktype = cpu_to_be32(type);
1031 header->h_sequence = cpu_to_be32(transaction->t_tid);
1032 set_buffer_uptodate(bh);
1033 unlock_buffer(bh);
1034 BUFFER_TRACE(bh, "return this buffer");
1035 return bh;
1036}
1037
1038void jbd2_descriptor_block_csum_set(journal_t *j, struct buffer_head *bh)
1039{
1040 struct jbd2_journal_block_tail *tail;
1041 __u32 csum;
1042
1043 if (!jbd2_journal_has_csum_v2or3(j))
1044 return;
1045
1046 tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
1047 sizeof(struct jbd2_journal_block_tail));
1048 tail->t_checksum = 0;
1049 csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
1050 tail->t_checksum = cpu_to_be32(csum);
1051}
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
1064 unsigned long *block)
1065{
1066 transaction_t *transaction;
1067 int ret;
1068
1069 read_lock(&journal->j_state_lock);
1070 spin_lock(&journal->j_list_lock);
1071 transaction = journal->j_checkpoint_transactions;
1072 if (transaction) {
1073 *tid = transaction->t_tid;
1074 *block = transaction->t_log_start;
1075 } else if ((transaction = journal->j_committing_transaction) != NULL) {
1076 *tid = transaction->t_tid;
1077 *block = transaction->t_log_start;
1078 } else if ((transaction = journal->j_running_transaction) != NULL) {
1079 *tid = transaction->t_tid;
1080 *block = journal->j_head;
1081 } else {
1082 *tid = journal->j_transaction_sequence;
1083 *block = journal->j_head;
1084 }
1085 ret = tid_gt(*tid, journal->j_tail_sequence);
1086 spin_unlock(&journal->j_list_lock);
1087 read_unlock(&journal->j_state_lock);
1088
1089 return ret;
1090}
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
1103{
1104 unsigned long freed;
1105 int ret;
1106
1107 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1108
1109
1110
1111
1112
1113
1114
1115 ret = jbd2_journal_update_sb_log_tail(journal, tid, block,
1116 REQ_SYNC | REQ_FUA);
1117 if (ret)
1118 goto out;
1119
1120 write_lock(&journal->j_state_lock);
1121 freed = block - journal->j_tail;
1122 if (block < journal->j_tail)
1123 freed += journal->j_last - journal->j_first;
1124
1125 trace_jbd2_update_log_tail(journal, tid, block, freed);
1126 jbd_debug(1,
1127 "Cleaning journal tail from %u to %u (offset %lu), "
1128 "freeing %lu\n",
1129 journal->j_tail_sequence, tid, block, freed);
1130
1131 journal->j_free += freed;
1132 journal->j_tail_sequence = tid;
1133 journal->j_tail = block;
1134 write_unlock(&journal->j_state_lock);
1135
1136out:
1137 return ret;
1138}
1139
1140
1141
1142
1143
1144
1145void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
1146{
1147 mutex_lock_io(&journal->j_checkpoint_mutex);
1148 if (tid_gt(tid, journal->j_tail_sequence))
1149 __jbd2_update_log_tail(journal, tid, block);
1150 mutex_unlock(&journal->j_checkpoint_mutex);
1151}
1152
1153struct jbd2_stats_proc_session {
1154 journal_t *journal;
1155 struct transaction_stats_s *stats;
1156 int start;
1157 int max;
1158};
1159
1160static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos)
1161{
1162 return *pos ? NULL : SEQ_START_TOKEN;
1163}
1164
1165static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
1166{
1167 (*pos)++;
1168 return NULL;
1169}
1170
1171static int jbd2_seq_info_show(struct seq_file *seq, void *v)
1172{
1173 struct jbd2_stats_proc_session *s = seq->private;
1174
1175 if (v != SEQ_START_TOKEN)
1176 return 0;
1177 seq_printf(seq, "%lu transactions (%lu requested), "
1178 "each up to %u blocks\n",
1179 s->stats->ts_tid, s->stats->ts_requested,
1180 s->journal->j_max_transaction_buffers);
1181 if (s->stats->ts_tid == 0)
1182 return 0;
1183 seq_printf(seq, "average: \n %ums waiting for transaction\n",
1184 jiffies_to_msecs(s->stats->run.rs_wait / s->stats->ts_tid));
1185 seq_printf(seq, " %ums request delay\n",
1186 (s->stats->ts_requested == 0) ? 0 :
1187 jiffies_to_msecs(s->stats->run.rs_request_delay /
1188 s->stats->ts_requested));
1189 seq_printf(seq, " %ums running transaction\n",
1190 jiffies_to_msecs(s->stats->run.rs_running / s->stats->ts_tid));
1191 seq_printf(seq, " %ums transaction was being locked\n",
1192 jiffies_to_msecs(s->stats->run.rs_locked / s->stats->ts_tid));
1193 seq_printf(seq, " %ums flushing data (in ordered mode)\n",
1194 jiffies_to_msecs(s->stats->run.rs_flushing / s->stats->ts_tid));
1195 seq_printf(seq, " %ums logging transaction\n",
1196 jiffies_to_msecs(s->stats->run.rs_logging / s->stats->ts_tid));
1197 seq_printf(seq, " %lluus average transaction commit time\n",
1198 div_u64(s->journal->j_average_commit_time, 1000));
1199 seq_printf(seq, " %lu handles per transaction\n",
1200 s->stats->run.rs_handle_count / s->stats->ts_tid);
1201 seq_printf(seq, " %lu blocks per transaction\n",
1202 s->stats->run.rs_blocks / s->stats->ts_tid);
1203 seq_printf(seq, " %lu logged blocks per transaction\n",
1204 s->stats->run.rs_blocks_logged / s->stats->ts_tid);
1205 return 0;
1206}
1207
1208static void jbd2_seq_info_stop(struct seq_file *seq, void *v)
1209{
1210}
1211
1212static const struct seq_operations jbd2_seq_info_ops = {
1213 .start = jbd2_seq_info_start,
1214 .next = jbd2_seq_info_next,
1215 .stop = jbd2_seq_info_stop,
1216 .show = jbd2_seq_info_show,
1217};
1218
1219static int jbd2_seq_info_open(struct inode *inode, struct file *file)
1220{
1221 journal_t *journal = PDE_DATA(inode);
1222 struct jbd2_stats_proc_session *s;
1223 int rc, size;
1224
1225 s = kmalloc(sizeof(*s), GFP_KERNEL);
1226 if (s == NULL)
1227 return -ENOMEM;
1228 size = sizeof(struct transaction_stats_s);
1229 s->stats = kmalloc(size, GFP_KERNEL);
1230 if (s->stats == NULL) {
1231 kfree(s);
1232 return -ENOMEM;
1233 }
1234 spin_lock(&journal->j_history_lock);
1235 memcpy(s->stats, &journal->j_stats, size);
1236 s->journal = journal;
1237 spin_unlock(&journal->j_history_lock);
1238
1239 rc = seq_open(file, &jbd2_seq_info_ops);
1240 if (rc == 0) {
1241 struct seq_file *m = file->private_data;
1242 m->private = s;
1243 } else {
1244 kfree(s->stats);
1245 kfree(s);
1246 }
1247 return rc;
1248
1249}
1250
1251static int jbd2_seq_info_release(struct inode *inode, struct file *file)
1252{
1253 struct seq_file *seq = file->private_data;
1254 struct jbd2_stats_proc_session *s = seq->private;
1255 kfree(s->stats);
1256 kfree(s);
1257 return seq_release(inode, file);
1258}
1259
1260static const struct proc_ops jbd2_info_proc_ops = {
1261 .proc_open = jbd2_seq_info_open,
1262 .proc_read = seq_read,
1263 .proc_lseek = seq_lseek,
1264 .proc_release = jbd2_seq_info_release,
1265};
1266
1267static struct proc_dir_entry *proc_jbd2_stats;
1268
1269static void jbd2_stats_proc_init(journal_t *journal)
1270{
1271 journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats);
1272 if (journal->j_proc_entry) {
1273 proc_create_data("info", S_IRUGO, journal->j_proc_entry,
1274 &jbd2_info_proc_ops, journal);
1275 }
1276}
1277
1278static void jbd2_stats_proc_exit(journal_t *journal)
1279{
1280 remove_proc_entry("info", journal->j_proc_entry);
1281 remove_proc_entry(journal->j_devname, proc_jbd2_stats);
1282}
1283
1284
1285static int jbd2_min_tag_size(void)
1286{
1287
1288
1289
1290
1291 return sizeof(journal_block_tag_t) - 4;
1292}
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303static journal_t *journal_init_common(struct block_device *bdev,
1304 struct block_device *fs_dev,
1305 unsigned long long start, int len, int blocksize)
1306{
1307 static struct lock_class_key jbd2_trans_commit_key;
1308 journal_t *journal;
1309 int err;
1310 struct buffer_head *bh;
1311 int n;
1312
1313 journal = kzalloc(sizeof(*journal), GFP_KERNEL);
1314 if (!journal)
1315 return NULL;
1316
1317 init_waitqueue_head(&journal->j_wait_transaction_locked);
1318 init_waitqueue_head(&journal->j_wait_done_commit);
1319 init_waitqueue_head(&journal->j_wait_commit);
1320 init_waitqueue_head(&journal->j_wait_updates);
1321 init_waitqueue_head(&journal->j_wait_reserved);
1322 init_waitqueue_head(&journal->j_fc_wait);
1323 mutex_init(&journal->j_abort_mutex);
1324 mutex_init(&journal->j_barrier);
1325 mutex_init(&journal->j_checkpoint_mutex);
1326 spin_lock_init(&journal->j_revoke_lock);
1327 spin_lock_init(&journal->j_list_lock);
1328 rwlock_init(&journal->j_state_lock);
1329
1330 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
1331 journal->j_min_batch_time = 0;
1332 journal->j_max_batch_time = 15000;
1333 atomic_set(&journal->j_reserved_credits, 0);
1334
1335
1336 journal->j_flags = JBD2_ABORT;
1337
1338
1339 err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
1340 if (err)
1341 goto err_cleanup;
1342
1343 spin_lock_init(&journal->j_history_lock);
1344
1345 lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
1346 &jbd2_trans_commit_key, 0);
1347
1348
1349 journal->j_blocksize = blocksize;
1350 journal->j_dev = bdev;
1351 journal->j_fs_dev = fs_dev;
1352 journal->j_blk_offset = start;
1353 journal->j_total_len = len;
1354
1355 n = journal->j_blocksize / jbd2_min_tag_size();
1356 journal->j_wbufsize = n;
1357 journal->j_fc_wbuf = NULL;
1358 journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
1359 GFP_KERNEL);
1360 if (!journal->j_wbuf)
1361 goto err_cleanup;
1362
1363 bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
1364 if (!bh) {
1365 pr_err("%s: Cannot get buffer for journal superblock\n",
1366 __func__);
1367 goto err_cleanup;
1368 }
1369 journal->j_sb_buffer = bh;
1370 journal->j_superblock = (journal_superblock_t *)bh->b_data;
1371
1372 return journal;
1373
1374err_cleanup:
1375 kfree(journal->j_wbuf);
1376 jbd2_journal_destroy_revoke(journal);
1377 kfree(journal);
1378 return NULL;
1379}
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404journal_t *jbd2_journal_init_dev(struct block_device *bdev,
1405 struct block_device *fs_dev,
1406 unsigned long long start, int len, int blocksize)
1407{
1408 journal_t *journal;
1409
1410 journal = journal_init_common(bdev, fs_dev, start, len, blocksize);
1411 if (!journal)
1412 return NULL;
1413
1414 bdevname(journal->j_dev, journal->j_devname);
1415 strreplace(journal->j_devname, '/', '!');
1416 jbd2_stats_proc_init(journal);
1417
1418 return journal;
1419}
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429journal_t *jbd2_journal_init_inode(struct inode *inode)
1430{
1431 journal_t *journal;
1432 sector_t blocknr;
1433 char *p;
1434 int err = 0;
1435
1436 blocknr = 0;
1437 err = bmap(inode, &blocknr);
1438
1439 if (err || !blocknr) {
1440 pr_err("%s: Cannot locate journal superblock\n",
1441 __func__);
1442 return NULL;
1443 }
1444
1445 jbd_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n",
1446 inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size,
1447 inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
1448
1449 journal = journal_init_common(inode->i_sb->s_bdev, inode->i_sb->s_bdev,
1450 blocknr, inode->i_size >> inode->i_sb->s_blocksize_bits,
1451 inode->i_sb->s_blocksize);
1452 if (!journal)
1453 return NULL;
1454
1455 journal->j_inode = inode;
1456 bdevname(journal->j_dev, journal->j_devname);
1457 p = strreplace(journal->j_devname, '/', '!');
1458 sprintf(p, "-%lu", journal->j_inode->i_ino);
1459 jbd2_stats_proc_init(journal);
1460
1461 return journal;
1462}
1463
1464
1465
1466
1467
1468
1469static void journal_fail_superblock(journal_t *journal)
1470{
1471 struct buffer_head *bh = journal->j_sb_buffer;
1472 brelse(bh);
1473 journal->j_sb_buffer = NULL;
1474}
1475
1476
1477
1478
1479
1480
1481
1482
1483static int journal_reset(journal_t *journal)
1484{
1485 journal_superblock_t *sb = journal->j_superblock;
1486 unsigned long long first, last;
1487
1488 first = be32_to_cpu(sb->s_first);
1489 last = be32_to_cpu(sb->s_maxlen);
1490 if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) {
1491 printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n",
1492 first, last);
1493 journal_fail_superblock(journal);
1494 return -EINVAL;
1495 }
1496
1497 journal->j_first = first;
1498 journal->j_last = last;
1499
1500 journal->j_head = journal->j_first;
1501 journal->j_tail = journal->j_first;
1502 journal->j_free = journal->j_last - journal->j_first;
1503
1504 journal->j_tail_sequence = journal->j_transaction_sequence;
1505 journal->j_commit_sequence = journal->j_transaction_sequence - 1;
1506 journal->j_commit_request = journal->j_commit_sequence;
1507
1508 journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal);
1509
1510
1511
1512
1513
1514
1515 jbd2_clear_feature_fast_commit(journal);
1516
1517
1518
1519
1520
1521
1522
1523 if (sb->s_start == 0) {
1524 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
1525 "(start %ld, seq %u, errno %d)\n",
1526 journal->j_tail, journal->j_tail_sequence,
1527 journal->j_errno);
1528 journal->j_flags |= JBD2_FLUSHED;
1529 } else {
1530
1531 mutex_lock_io(&journal->j_checkpoint_mutex);
1532
1533
1534
1535
1536
1537
1538 jbd2_journal_update_sb_log_tail(journal,
1539 journal->j_tail_sequence,
1540 journal->j_tail,
1541 REQ_SYNC | REQ_FUA);
1542 mutex_unlock(&journal->j_checkpoint_mutex);
1543 }
1544 return jbd2_journal_start_thread(journal);
1545}
1546
1547
1548
1549
1550
1551static int jbd2_write_superblock(journal_t *journal, int write_flags)
1552{
1553 struct buffer_head *bh = journal->j_sb_buffer;
1554 journal_superblock_t *sb = journal->j_superblock;
1555 int ret;
1556
1557
1558 if (!buffer_mapped(bh)) {
1559 unlock_buffer(bh);
1560 return -EIO;
1561 }
1562
1563 trace_jbd2_write_superblock(journal, write_flags);
1564 if (!(journal->j_flags & JBD2_BARRIER))
1565 write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
1566 if (buffer_write_io_error(bh)) {
1567
1568
1569
1570
1571
1572
1573
1574
1575 printk(KERN_ERR "JBD2: previous I/O error detected "
1576 "for journal superblock update for %s.\n",
1577 journal->j_devname);
1578 clear_buffer_write_io_error(bh);
1579 set_buffer_uptodate(bh);
1580 }
1581 if (jbd2_journal_has_csum_v2or3(journal))
1582 sb->s_checksum = jbd2_superblock_csum(journal, sb);
1583 get_bh(bh);
1584 bh->b_end_io = end_buffer_write_sync;
1585 ret = submit_bh(REQ_OP_WRITE, write_flags, bh);
1586 wait_on_buffer(bh);
1587 if (buffer_write_io_error(bh)) {
1588 clear_buffer_write_io_error(bh);
1589 set_buffer_uptodate(bh);
1590 ret = -EIO;
1591 }
1592 if (ret) {
1593 printk(KERN_ERR "JBD2: Error %d detected when updating "
1594 "journal superblock for %s.\n", ret,
1595 journal->j_devname);
1596 if (!is_journal_aborted(journal))
1597 jbd2_journal_abort(journal, ret);
1598 }
1599
1600 return ret;
1601}
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
1614 unsigned long tail_block, int write_op)
1615{
1616 journal_superblock_t *sb = journal->j_superblock;
1617 int ret;
1618
1619 if (is_journal_aborted(journal))
1620 return -EIO;
1621
1622 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1623 jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
1624 tail_block, tail_tid);
1625
1626 lock_buffer(journal->j_sb_buffer);
1627 sb->s_sequence = cpu_to_be32(tail_tid);
1628 sb->s_start = cpu_to_be32(tail_block);
1629
1630 ret = jbd2_write_superblock(journal, write_op);
1631 if (ret)
1632 goto out;
1633
1634
1635 write_lock(&journal->j_state_lock);
1636 WARN_ON(!sb->s_sequence);
1637 journal->j_flags &= ~JBD2_FLUSHED;
1638 write_unlock(&journal->j_state_lock);
1639
1640out:
1641 return ret;
1642}
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
1653{
1654 journal_superblock_t *sb = journal->j_superblock;
1655 bool had_fast_commit = false;
1656
1657 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1658 lock_buffer(journal->j_sb_buffer);
1659 if (sb->s_start == 0) {
1660 unlock_buffer(journal->j_sb_buffer);
1661 return;
1662 }
1663
1664 jbd_debug(1, "JBD2: Marking journal as empty (seq %u)\n",
1665 journal->j_tail_sequence);
1666
1667 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
1668 sb->s_start = cpu_to_be32(0);
1669 if (jbd2_has_feature_fast_commit(journal)) {
1670
1671
1672
1673
1674 jbd2_clear_feature_fast_commit(journal);
1675 had_fast_commit = true;
1676 }
1677
1678 jbd2_write_superblock(journal, write_op);
1679
1680 if (had_fast_commit)
1681 jbd2_set_feature_fast_commit(journal);
1682
1683
1684 write_lock(&journal->j_state_lock);
1685 journal->j_flags |= JBD2_FLUSHED;
1686 write_unlock(&journal->j_state_lock);
1687}
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697void jbd2_journal_update_sb_errno(journal_t *journal)
1698{
1699 journal_superblock_t *sb = journal->j_superblock;
1700 int errcode;
1701
1702 lock_buffer(journal->j_sb_buffer);
1703 errcode = journal->j_errno;
1704 if (errcode == -ESHUTDOWN)
1705 errcode = 0;
1706 jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode);
1707 sb->s_errno = cpu_to_be32(errcode);
1708
1709 jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA);
1710}
1711EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
1712
1713static int journal_revoke_records_per_block(journal_t *journal)
1714{
1715 int record_size;
1716 int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
1717
1718 if (jbd2_has_feature_64bit(journal))
1719 record_size = 8;
1720 else
1721 record_size = 4;
1722
1723 if (jbd2_journal_has_csum_v2or3(journal))
1724 space -= sizeof(struct jbd2_journal_block_tail);
1725 return space / record_size;
1726}
1727
1728
1729
1730
1731
1732static int journal_get_superblock(journal_t *journal)
1733{
1734 struct buffer_head *bh;
1735 journal_superblock_t *sb;
1736 int err = -EIO;
1737
1738 bh = journal->j_sb_buffer;
1739
1740 J_ASSERT(bh != NULL);
1741 if (!buffer_uptodate(bh)) {
1742 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
1743 wait_on_buffer(bh);
1744 if (!buffer_uptodate(bh)) {
1745 printk(KERN_ERR
1746 "JBD2: IO error reading journal superblock\n");
1747 goto out;
1748 }
1749 }
1750
1751 if (buffer_verified(bh))
1752 return 0;
1753
1754 sb = journal->j_superblock;
1755
1756 err = -EINVAL;
1757
1758 if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
1759 sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
1760 printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
1761 goto out;
1762 }
1763
1764 switch(be32_to_cpu(sb->s_header.h_blocktype)) {
1765 case JBD2_SUPERBLOCK_V1:
1766 journal->j_format_version = 1;
1767 break;
1768 case JBD2_SUPERBLOCK_V2:
1769 journal->j_format_version = 2;
1770 break;
1771 default:
1772 printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
1773 goto out;
1774 }
1775
1776 if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
1777 journal->j_total_len = be32_to_cpu(sb->s_maxlen);
1778 else if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
1779 printk(KERN_WARNING "JBD2: journal file too short\n");
1780 goto out;
1781 }
1782
1783 if (be32_to_cpu(sb->s_first) == 0 ||
1784 be32_to_cpu(sb->s_first) >= journal->j_total_len) {
1785 printk(KERN_WARNING
1786 "JBD2: Invalid start block of journal: %u\n",
1787 be32_to_cpu(sb->s_first));
1788 goto out;
1789 }
1790
1791 if (jbd2_has_feature_csum2(journal) &&
1792 jbd2_has_feature_csum3(journal)) {
1793
1794 printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
1795 "at the same time!\n");
1796 goto out;
1797 }
1798
1799 if (jbd2_journal_has_csum_v2or3_feature(journal) &&
1800 jbd2_has_feature_checksum(journal)) {
1801
1802 printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
1803 "at the same time!\n");
1804 goto out;
1805 }
1806
1807 if (!jbd2_verify_csum_type(journal, sb)) {
1808 printk(KERN_ERR "JBD2: Unknown checksum type\n");
1809 goto out;
1810 }
1811
1812
1813 if (jbd2_journal_has_csum_v2or3_feature(journal)) {
1814 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
1815 if (IS_ERR(journal->j_chksum_driver)) {
1816 printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
1817 err = PTR_ERR(journal->j_chksum_driver);
1818 journal->j_chksum_driver = NULL;
1819 goto out;
1820 }
1821 }
1822
1823 if (jbd2_journal_has_csum_v2or3(journal)) {
1824
1825 if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
1826 printk(KERN_ERR "JBD2: journal checksum error\n");
1827 err = -EFSBADCRC;
1828 goto out;
1829 }
1830
1831
1832 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
1833 sizeof(sb->s_uuid));
1834 }
1835
1836 journal->j_revoke_records_per_block =
1837 journal_revoke_records_per_block(journal);
1838 set_buffer_verified(bh);
1839
1840 return 0;
1841
1842out:
1843 journal_fail_superblock(journal);
1844 return err;
1845}
1846
1847
1848
1849
1850
1851
1852static int load_superblock(journal_t *journal)
1853{
1854 int err;
1855 journal_superblock_t *sb;
1856 int num_fc_blocks;
1857
1858 err = journal_get_superblock(journal);
1859 if (err)
1860 return err;
1861
1862 sb = journal->j_superblock;
1863
1864 journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
1865 journal->j_tail = be32_to_cpu(sb->s_start);
1866 journal->j_first = be32_to_cpu(sb->s_first);
1867 journal->j_errno = be32_to_cpu(sb->s_errno);
1868 journal->j_last = be32_to_cpu(sb->s_maxlen);
1869
1870 if (jbd2_has_feature_fast_commit(journal)) {
1871 journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
1872 num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks);
1873 if (!num_fc_blocks)
1874 num_fc_blocks = JBD2_MIN_FC_BLOCKS;
1875 if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS)
1876 journal->j_last = journal->j_fc_last - num_fc_blocks;
1877 journal->j_fc_first = journal->j_last + 1;
1878 journal->j_fc_off = 0;
1879 }
1880
1881 return 0;
1882}
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893int jbd2_journal_load(journal_t *journal)
1894{
1895 int err;
1896 journal_superblock_t *sb;
1897
1898 err = load_superblock(journal);
1899 if (err)
1900 return err;
1901
1902 sb = journal->j_superblock;
1903
1904
1905
1906 if (journal->j_format_version >= 2) {
1907 if ((sb->s_feature_ro_compat &
1908 ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
1909 (sb->s_feature_incompat &
1910 ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
1911 printk(KERN_WARNING
1912 "JBD2: Unrecognised features on journal\n");
1913 return -EINVAL;
1914 }
1915 }
1916
1917
1918
1919
1920 err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize));
1921 if (err)
1922 return err;
1923
1924
1925
1926 if (jbd2_journal_recover(journal))
1927 goto recovery_error;
1928
1929 if (journal->j_failed_commit) {
1930 printk(KERN_ERR "JBD2: journal transaction %u on %s "
1931 "is corrupt.\n", journal->j_failed_commit,
1932 journal->j_devname);
1933 return -EFSCORRUPTED;
1934 }
1935
1936
1937
1938
1939 journal->j_flags &= ~JBD2_ABORT;
1940
1941
1942
1943
1944 if (journal_reset(journal))
1945 goto recovery_error;
1946
1947 journal->j_flags |= JBD2_LOADED;
1948 return 0;
1949
1950recovery_error:
1951 printk(KERN_WARNING "JBD2: recovery failed\n");
1952 return -EIO;
1953}
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963int jbd2_journal_destroy(journal_t *journal)
1964{
1965 int err = 0;
1966
1967
1968 journal_kill_thread(journal);
1969
1970
1971 if (journal->j_running_transaction)
1972 jbd2_journal_commit_transaction(journal);
1973
1974
1975
1976
1977 spin_lock(&journal->j_list_lock);
1978 while (journal->j_checkpoint_transactions != NULL) {
1979 spin_unlock(&journal->j_list_lock);
1980 mutex_lock_io(&journal->j_checkpoint_mutex);
1981 err = jbd2_log_do_checkpoint(journal);
1982 mutex_unlock(&journal->j_checkpoint_mutex);
1983
1984
1985
1986
1987 if (err) {
1988 jbd2_journal_destroy_checkpoint(journal);
1989 spin_lock(&journal->j_list_lock);
1990 break;
1991 }
1992 spin_lock(&journal->j_list_lock);
1993 }
1994
1995 J_ASSERT(journal->j_running_transaction == NULL);
1996 J_ASSERT(journal->j_committing_transaction == NULL);
1997 J_ASSERT(journal->j_checkpoint_transactions == NULL);
1998 spin_unlock(&journal->j_list_lock);
1999
2000 if (journal->j_sb_buffer) {
2001 if (!is_journal_aborted(journal)) {
2002 mutex_lock_io(&journal->j_checkpoint_mutex);
2003
2004 write_lock(&journal->j_state_lock);
2005 journal->j_tail_sequence =
2006 ++journal->j_transaction_sequence;
2007 write_unlock(&journal->j_state_lock);
2008
2009 jbd2_mark_journal_empty(journal,
2010 REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
2011 mutex_unlock(&journal->j_checkpoint_mutex);
2012 } else
2013 err = -EIO;
2014 brelse(journal->j_sb_buffer);
2015 }
2016
2017 if (journal->j_proc_entry)
2018 jbd2_stats_proc_exit(journal);
2019 iput(journal->j_inode);
2020 if (journal->j_revoke)
2021 jbd2_journal_destroy_revoke(journal);
2022 if (journal->j_chksum_driver)
2023 crypto_free_shash(journal->j_chksum_driver);
2024 kfree(journal->j_fc_wbuf);
2025 kfree(journal->j_wbuf);
2026 kfree(journal);
2027
2028 return err;
2029}
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat,
2044 unsigned long ro, unsigned long incompat)
2045{
2046 journal_superblock_t *sb;
2047
2048 if (!compat && !ro && !incompat)
2049 return 1;
2050
2051 if (journal->j_format_version == 0 &&
2052 journal_get_superblock(journal) != 0)
2053 return 0;
2054 if (journal->j_format_version == 1)
2055 return 0;
2056
2057 sb = journal->j_superblock;
2058
2059 if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) &&
2060 ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) &&
2061 ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat))
2062 return 1;
2063
2064 return 0;
2065}
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078int jbd2_journal_check_available_features(journal_t *journal, unsigned long compat,
2079 unsigned long ro, unsigned long incompat)
2080{
2081 if (!compat && !ro && !incompat)
2082 return 1;
2083
2084
2085
2086
2087
2088 if (journal->j_format_version != 2)
2089 return 0;
2090
2091 if ((compat & JBD2_KNOWN_COMPAT_FEATURES) == compat &&
2092 (ro & JBD2_KNOWN_ROCOMPAT_FEATURES) == ro &&
2093 (incompat & JBD2_KNOWN_INCOMPAT_FEATURES) == incompat)
2094 return 1;
2095
2096 return 0;
2097}
2098
2099static int
2100jbd2_journal_initialize_fast_commit(journal_t *journal)
2101{
2102 journal_superblock_t *sb = journal->j_superblock;
2103 unsigned long long num_fc_blks;
2104
2105 num_fc_blks = be32_to_cpu(sb->s_num_fc_blks);
2106 if (num_fc_blks == 0)
2107 num_fc_blks = JBD2_MIN_FC_BLOCKS;
2108 if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS)
2109 return -ENOSPC;
2110
2111
2112 WARN_ON(journal->j_fc_wbuf != NULL);
2113 journal->j_fc_wbuf = kmalloc_array(num_fc_blks,
2114 sizeof(struct buffer_head *), GFP_KERNEL);
2115 if (!journal->j_fc_wbuf)
2116 return -ENOMEM;
2117
2118 journal->j_fc_wbufsize = num_fc_blks;
2119 journal->j_fc_last = journal->j_last;
2120 journal->j_last = journal->j_fc_last - num_fc_blks;
2121 journal->j_fc_first = journal->j_last + 1;
2122 journal->j_fc_off = 0;
2123 journal->j_free = journal->j_last - journal->j_first;
2124 journal->j_max_transaction_buffers =
2125 jbd2_journal_get_max_txn_bufs(journal);
2126
2127 return 0;
2128}
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
2143 unsigned long ro, unsigned long incompat)
2144{
2145#define INCOMPAT_FEATURE_ON(f) \
2146 ((incompat & (f)) && !(sb->s_feature_incompat & cpu_to_be32(f)))
2147#define COMPAT_FEATURE_ON(f) \
2148 ((compat & (f)) && !(sb->s_feature_compat & cpu_to_be32(f)))
2149 journal_superblock_t *sb;
2150
2151 if (jbd2_journal_check_used_features(journal, compat, ro, incompat))
2152 return 1;
2153
2154 if (!jbd2_journal_check_available_features(journal, compat, ro, incompat))
2155 return 0;
2156
2157
2158 if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) {
2159 incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2;
2160 incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3;
2161 }
2162
2163
2164 if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 &&
2165 compat & JBD2_FEATURE_COMPAT_CHECKSUM)
2166 compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM;
2167
2168 jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
2169 compat, ro, incompat);
2170
2171 sb = journal->j_superblock;
2172
2173 if (incompat & JBD2_FEATURE_INCOMPAT_FAST_COMMIT) {
2174 if (jbd2_journal_initialize_fast_commit(journal)) {
2175 pr_err("JBD2: Cannot enable fast commits.\n");
2176 return 0;
2177 }
2178 }
2179
2180
2181 if ((journal->j_chksum_driver == NULL) &&
2182 INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
2183 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
2184 if (IS_ERR(journal->j_chksum_driver)) {
2185 printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
2186 journal->j_chksum_driver = NULL;
2187 return 0;
2188 }
2189
2190 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
2191 sizeof(sb->s_uuid));
2192 }
2193
2194 lock_buffer(journal->j_sb_buffer);
2195
2196
2197 if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
2198 sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
2199 sb->s_feature_compat &=
2200 ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
2201 }
2202
2203
2204 if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM))
2205 sb->s_feature_incompat &=
2206 ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 |
2207 JBD2_FEATURE_INCOMPAT_CSUM_V3);
2208
2209 sb->s_feature_compat |= cpu_to_be32(compat);
2210 sb->s_feature_ro_compat |= cpu_to_be32(ro);
2211 sb->s_feature_incompat |= cpu_to_be32(incompat);
2212 unlock_buffer(journal->j_sb_buffer);
2213 journal->j_revoke_records_per_block =
2214 journal_revoke_records_per_block(journal);
2215
2216 return 1;
2217#undef COMPAT_FEATURE_ON
2218#undef INCOMPAT_FEATURE_ON
2219}
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
2233 unsigned long ro, unsigned long incompat)
2234{
2235 journal_superblock_t *sb;
2236
2237 jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n",
2238 compat, ro, incompat);
2239
2240 sb = journal->j_superblock;
2241
2242 sb->s_feature_compat &= ~cpu_to_be32(compat);
2243 sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
2244 sb->s_feature_incompat &= ~cpu_to_be32(incompat);
2245 journal->j_revoke_records_per_block =
2246 journal_revoke_records_per_block(journal);
2247}
2248EXPORT_SYMBOL(jbd2_journal_clear_features);
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259int jbd2_journal_flush(journal_t *journal)
2260{
2261 int err = 0;
2262 transaction_t *transaction = NULL;
2263
2264 write_lock(&journal->j_state_lock);
2265
2266
2267 if (journal->j_running_transaction) {
2268 transaction = journal->j_running_transaction;
2269 __jbd2_log_start_commit(journal, transaction->t_tid);
2270 } else if (journal->j_committing_transaction)
2271 transaction = journal->j_committing_transaction;
2272
2273
2274 if (transaction) {
2275 tid_t tid = transaction->t_tid;
2276
2277 write_unlock(&journal->j_state_lock);
2278 jbd2_log_wait_commit(journal, tid);
2279 } else {
2280 write_unlock(&journal->j_state_lock);
2281 }
2282
2283
2284 spin_lock(&journal->j_list_lock);
2285 while (!err && journal->j_checkpoint_transactions != NULL) {
2286 spin_unlock(&journal->j_list_lock);
2287 mutex_lock_io(&journal->j_checkpoint_mutex);
2288 err = jbd2_log_do_checkpoint(journal);
2289 mutex_unlock(&journal->j_checkpoint_mutex);
2290 spin_lock(&journal->j_list_lock);
2291 }
2292 spin_unlock(&journal->j_list_lock);
2293
2294 if (is_journal_aborted(journal))
2295 return -EIO;
2296
2297 mutex_lock_io(&journal->j_checkpoint_mutex);
2298 if (!err) {
2299 err = jbd2_cleanup_journal_tail(journal);
2300 if (err < 0) {
2301 mutex_unlock(&journal->j_checkpoint_mutex);
2302 goto out;
2303 }
2304 err = 0;
2305 }
2306
2307
2308
2309
2310
2311
2312 jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
2313 mutex_unlock(&journal->j_checkpoint_mutex);
2314 write_lock(&journal->j_state_lock);
2315 J_ASSERT(!journal->j_running_transaction);
2316 J_ASSERT(!journal->j_committing_transaction);
2317 J_ASSERT(!journal->j_checkpoint_transactions);
2318 J_ASSERT(journal->j_head == journal->j_tail);
2319 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
2320 write_unlock(&journal->j_state_lock);
2321out:
2322 return err;
2323}
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338int jbd2_journal_wipe(journal_t *journal, int write)
2339{
2340 int err = 0;
2341
2342 J_ASSERT (!(journal->j_flags & JBD2_LOADED));
2343
2344 err = load_superblock(journal);
2345 if (err)
2346 return err;
2347
2348 if (!journal->j_tail)
2349 goto no_recovery;
2350
2351 printk(KERN_WARNING "JBD2: %s recovery information on journal\n",
2352 write ? "Clearing" : "Ignoring");
2353
2354 err = jbd2_journal_skip_recovery(journal);
2355 if (write) {
2356
2357 mutex_lock_io(&journal->j_checkpoint_mutex);
2358 jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
2359 mutex_unlock(&journal->j_checkpoint_mutex);
2360 }
2361
2362 no_recovery:
2363 return err;
2364}
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407void jbd2_journal_abort(journal_t *journal, int errno)
2408{
2409 transaction_t *transaction;
2410
2411
2412
2413
2414
2415
2416
2417 mutex_lock(&journal->j_abort_mutex);
2418
2419
2420
2421
2422
2423 write_lock(&journal->j_state_lock);
2424 if (journal->j_flags & JBD2_ABORT) {
2425 int old_errno = journal->j_errno;
2426
2427 write_unlock(&journal->j_state_lock);
2428 if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) {
2429 journal->j_errno = errno;
2430 jbd2_journal_update_sb_errno(journal);
2431 }
2432 mutex_unlock(&journal->j_abort_mutex);
2433 return;
2434 }
2435
2436
2437
2438
2439
2440 pr_err("Aborting journal on device %s.\n", journal->j_devname);
2441
2442 journal->j_flags |= JBD2_ABORT;
2443 journal->j_errno = errno;
2444 transaction = journal->j_running_transaction;
2445 if (transaction)
2446 __jbd2_log_start_commit(journal, transaction->t_tid);
2447 write_unlock(&journal->j_state_lock);
2448
2449
2450
2451
2452
2453 jbd2_journal_update_sb_errno(journal);
2454 mutex_unlock(&journal->j_abort_mutex);
2455}
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468int jbd2_journal_errno(journal_t *journal)
2469{
2470 int err;
2471
2472 read_lock(&journal->j_state_lock);
2473 if (journal->j_flags & JBD2_ABORT)
2474 err = -EROFS;
2475 else
2476 err = journal->j_errno;
2477 read_unlock(&journal->j_state_lock);
2478 return err;
2479}
2480
2481
2482
2483
2484
2485
2486
2487
2488int jbd2_journal_clear_err(journal_t *journal)
2489{
2490 int err = 0;
2491
2492 write_lock(&journal->j_state_lock);
2493 if (journal->j_flags & JBD2_ABORT)
2494 err = -EROFS;
2495 else
2496 journal->j_errno = 0;
2497 write_unlock(&journal->j_state_lock);
2498 return err;
2499}
2500
2501
2502
2503
2504
2505
2506
2507
2508void jbd2_journal_ack_err(journal_t *journal)
2509{
2510 write_lock(&journal->j_state_lock);
2511 if (journal->j_errno)
2512 journal->j_flags |= JBD2_ACK_ERR;
2513 write_unlock(&journal->j_state_lock);
2514}
2515
2516int jbd2_journal_blocks_per_page(struct inode *inode)
2517{
2518 return 1 << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
2519}
2520
2521
2522
2523
2524size_t journal_tag_bytes(journal_t *journal)
2525{
2526 size_t sz;
2527
2528 if (jbd2_has_feature_csum3(journal))
2529 return sizeof(journal_block_tag3_t);
2530
2531 sz = sizeof(journal_block_tag_t);
2532
2533 if (jbd2_has_feature_csum2(journal))
2534 sz += sizeof(__u16);
2535
2536 if (jbd2_has_feature_64bit(journal))
2537 return sz;
2538 else
2539 return sz - sizeof(__u32);
2540}
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557#define JBD2_MAX_SLABS 8
2558static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS];
2559
2560static const char *jbd2_slab_names[JBD2_MAX_SLABS] = {
2561 "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k",
2562 "jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k"
2563};
2564
2565
2566static void jbd2_journal_destroy_slabs(void)
2567{
2568 int i;
2569
2570 for (i = 0; i < JBD2_MAX_SLABS; i++) {
2571 kmem_cache_destroy(jbd2_slab[i]);
2572 jbd2_slab[i] = NULL;
2573 }
2574}
2575
2576static int jbd2_journal_create_slab(size_t size)
2577{
2578 static DEFINE_MUTEX(jbd2_slab_create_mutex);
2579 int i = order_base_2(size) - 10;
2580 size_t slab_size;
2581
2582 if (size == PAGE_SIZE)
2583 return 0;
2584
2585 if (i >= JBD2_MAX_SLABS)
2586 return -EINVAL;
2587
2588 if (unlikely(i < 0))
2589 i = 0;
2590 mutex_lock(&jbd2_slab_create_mutex);
2591 if (jbd2_slab[i]) {
2592 mutex_unlock(&jbd2_slab_create_mutex);
2593 return 0;
2594 }
2595
2596 slab_size = 1 << (i+10);
2597 jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size,
2598 slab_size, 0, NULL);
2599 mutex_unlock(&jbd2_slab_create_mutex);
2600 if (!jbd2_slab[i]) {
2601 printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n");
2602 return -ENOMEM;
2603 }
2604 return 0;
2605}
2606
2607static struct kmem_cache *get_slab(size_t size)
2608{
2609 int i = order_base_2(size) - 10;
2610
2611 BUG_ON(i >= JBD2_MAX_SLABS);
2612 if (unlikely(i < 0))
2613 i = 0;
2614 BUG_ON(jbd2_slab[i] == NULL);
2615 return jbd2_slab[i];
2616}
2617
2618void *jbd2_alloc(size_t size, gfp_t flags)
2619{
2620 void *ptr;
2621
2622 BUG_ON(size & (size-1));
2623
2624 if (size < PAGE_SIZE)
2625 ptr = kmem_cache_alloc(get_slab(size), flags);
2626 else
2627 ptr = (void *)__get_free_pages(flags, get_order(size));
2628
2629
2630
2631 BUG_ON(((unsigned long) ptr) & (size-1));
2632
2633 return ptr;
2634}
2635
2636void jbd2_free(void *ptr, size_t size)
2637{
2638 if (size < PAGE_SIZE)
2639 kmem_cache_free(get_slab(size), ptr);
2640 else
2641 free_pages((unsigned long)ptr, get_order(size));
2642};
2643
2644
2645
2646
2647static struct kmem_cache *jbd2_journal_head_cache;
2648#ifdef CONFIG_JBD2_DEBUG
2649static atomic_t nr_journal_heads = ATOMIC_INIT(0);
2650#endif
2651
2652static int __init jbd2_journal_init_journal_head_cache(void)
2653{
2654 J_ASSERT(!jbd2_journal_head_cache);
2655 jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
2656 sizeof(struct journal_head),
2657 0,
2658 SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU,
2659 NULL);
2660 if (!jbd2_journal_head_cache) {
2661 printk(KERN_EMERG "JBD2: no memory for journal_head cache\n");
2662 return -ENOMEM;
2663 }
2664 return 0;
2665}
2666
2667static void jbd2_journal_destroy_journal_head_cache(void)
2668{
2669 kmem_cache_destroy(jbd2_journal_head_cache);
2670 jbd2_journal_head_cache = NULL;
2671}
2672
2673
2674
2675
2676static struct journal_head *journal_alloc_journal_head(void)
2677{
2678 struct journal_head *ret;
2679
2680#ifdef CONFIG_JBD2_DEBUG
2681 atomic_inc(&nr_journal_heads);
2682#endif
2683 ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS);
2684 if (!ret) {
2685 jbd_debug(1, "out of memory for journal_head\n");
2686 pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__);
2687 ret = kmem_cache_zalloc(jbd2_journal_head_cache,
2688 GFP_NOFS | __GFP_NOFAIL);
2689 }
2690 if (ret)
2691 spin_lock_init(&ret->b_state_lock);
2692 return ret;
2693}
2694
2695static void journal_free_journal_head(struct journal_head *jh)
2696{
2697#ifdef CONFIG_JBD2_DEBUG
2698 atomic_dec(&nr_journal_heads);
2699 memset(jh, JBD2_POISON_FREE, sizeof(*jh));
2700#endif
2701 kmem_cache_free(jbd2_journal_head_cache, jh);
2702}
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh)
2746{
2747 struct journal_head *jh;
2748 struct journal_head *new_jh = NULL;
2749
2750repeat:
2751 if (!buffer_jbd(bh))
2752 new_jh = journal_alloc_journal_head();
2753
2754 jbd_lock_bh_journal_head(bh);
2755 if (buffer_jbd(bh)) {
2756 jh = bh2jh(bh);
2757 } else {
2758 J_ASSERT_BH(bh,
2759 (atomic_read(&bh->b_count) > 0) ||
2760 (bh->b_page && bh->b_page->mapping));
2761
2762 if (!new_jh) {
2763 jbd_unlock_bh_journal_head(bh);
2764 goto repeat;
2765 }
2766
2767 jh = new_jh;
2768 new_jh = NULL;
2769 set_buffer_jbd(bh);
2770 bh->b_private = jh;
2771 jh->b_bh = bh;
2772 get_bh(bh);
2773 BUFFER_TRACE(bh, "added journal_head");
2774 }
2775 jh->b_jcount++;
2776 jbd_unlock_bh_journal_head(bh);
2777 if (new_jh)
2778 journal_free_journal_head(new_jh);
2779 return bh->b_private;
2780}
2781
2782
2783
2784
2785
2786struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh)
2787{
2788 struct journal_head *jh = NULL;
2789
2790 jbd_lock_bh_journal_head(bh);
2791 if (buffer_jbd(bh)) {
2792 jh = bh2jh(bh);
2793 jh->b_jcount++;
2794 }
2795 jbd_unlock_bh_journal_head(bh);
2796 return jh;
2797}
2798
2799static void __journal_remove_journal_head(struct buffer_head *bh)
2800{
2801 struct journal_head *jh = bh2jh(bh);
2802
2803 J_ASSERT_JH(jh, jh->b_transaction == NULL);
2804 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
2805 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
2806 J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
2807 J_ASSERT_BH(bh, buffer_jbd(bh));
2808 J_ASSERT_BH(bh, jh2bh(jh) == bh);
2809 BUFFER_TRACE(bh, "remove journal_head");
2810
2811
2812 bh->b_private = NULL;
2813 jh->b_bh = NULL;
2814 clear_buffer_jbd(bh);
2815}
2816
2817static void journal_release_journal_head(struct journal_head *jh, size_t b_size)
2818{
2819 if (jh->b_frozen_data) {
2820 printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
2821 jbd2_free(jh->b_frozen_data, b_size);
2822 }
2823 if (jh->b_committed_data) {
2824 printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
2825 jbd2_free(jh->b_committed_data, b_size);
2826 }
2827 journal_free_journal_head(jh);
2828}
2829
2830
2831
2832
2833
2834void jbd2_journal_put_journal_head(struct journal_head *jh)
2835{
2836 struct buffer_head *bh = jh2bh(jh);
2837
2838 jbd_lock_bh_journal_head(bh);
2839 J_ASSERT_JH(jh, jh->b_jcount > 0);
2840 --jh->b_jcount;
2841 if (!jh->b_jcount) {
2842 __journal_remove_journal_head(bh);
2843 jbd_unlock_bh_journal_head(bh);
2844 journal_release_journal_head(jh, bh->b_size);
2845 __brelse(bh);
2846 } else {
2847 jbd_unlock_bh_journal_head(bh);
2848 }
2849}
2850
2851
2852
2853
2854void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
2855{
2856 jinode->i_transaction = NULL;
2857 jinode->i_next_transaction = NULL;
2858 jinode->i_vfs_inode = inode;
2859 jinode->i_flags = 0;
2860 jinode->i_dirty_start = 0;
2861 jinode->i_dirty_end = 0;
2862 INIT_LIST_HEAD(&jinode->i_list);
2863}
2864
2865
2866
2867
2868
2869
2870void jbd2_journal_release_jbd_inode(journal_t *journal,
2871 struct jbd2_inode *jinode)
2872{
2873 if (!journal)
2874 return;
2875restart:
2876 spin_lock(&journal->j_list_lock);
2877
2878 if (jinode->i_flags & JI_COMMIT_RUNNING) {
2879 wait_queue_head_t *wq;
2880 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
2881 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
2882 prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
2883 spin_unlock(&journal->j_list_lock);
2884 schedule();
2885 finish_wait(wq, &wait.wq_entry);
2886 goto restart;
2887 }
2888
2889 if (jinode->i_transaction) {
2890 list_del(&jinode->i_list);
2891 jinode->i_transaction = NULL;
2892 }
2893 spin_unlock(&journal->j_list_lock);
2894}
2895
2896
2897#ifdef CONFIG_PROC_FS
2898
2899#define JBD2_STATS_PROC_NAME "fs/jbd2"
2900
2901static void __init jbd2_create_jbd_stats_proc_entry(void)
2902{
2903 proc_jbd2_stats = proc_mkdir(JBD2_STATS_PROC_NAME, NULL);
2904}
2905
2906static void __exit jbd2_remove_jbd_stats_proc_entry(void)
2907{
2908 if (proc_jbd2_stats)
2909 remove_proc_entry(JBD2_STATS_PROC_NAME, NULL);
2910}
2911
2912#else
2913
2914#define jbd2_create_jbd_stats_proc_entry() do {} while (0)
2915#define jbd2_remove_jbd_stats_proc_entry() do {} while (0)
2916
2917#endif
2918
2919struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;
2920
2921static int __init jbd2_journal_init_inode_cache(void)
2922{
2923 J_ASSERT(!jbd2_inode_cache);
2924 jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0);
2925 if (!jbd2_inode_cache) {
2926 pr_emerg("JBD2: failed to create inode cache\n");
2927 return -ENOMEM;
2928 }
2929 return 0;
2930}
2931
2932static int __init jbd2_journal_init_handle_cache(void)
2933{
2934 J_ASSERT(!jbd2_handle_cache);
2935 jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
2936 if (!jbd2_handle_cache) {
2937 printk(KERN_EMERG "JBD2: failed to create handle cache\n");
2938 return -ENOMEM;
2939 }
2940 return 0;
2941}
2942
2943static void jbd2_journal_destroy_inode_cache(void)
2944{
2945 kmem_cache_destroy(jbd2_inode_cache);
2946 jbd2_inode_cache = NULL;
2947}
2948
2949static void jbd2_journal_destroy_handle_cache(void)
2950{
2951 kmem_cache_destroy(jbd2_handle_cache);
2952 jbd2_handle_cache = NULL;
2953}
2954
2955
2956
2957
2958
2959static int __init journal_init_caches(void)
2960{
2961 int ret;
2962
2963 ret = jbd2_journal_init_revoke_record_cache();
2964 if (ret == 0)
2965 ret = jbd2_journal_init_revoke_table_cache();
2966 if (ret == 0)
2967 ret = jbd2_journal_init_journal_head_cache();
2968 if (ret == 0)
2969 ret = jbd2_journal_init_handle_cache();
2970 if (ret == 0)
2971 ret = jbd2_journal_init_inode_cache();
2972 if (ret == 0)
2973 ret = jbd2_journal_init_transaction_cache();
2974 return ret;
2975}
2976
2977static void jbd2_journal_destroy_caches(void)
2978{
2979 jbd2_journal_destroy_revoke_record_cache();
2980 jbd2_journal_destroy_revoke_table_cache();
2981 jbd2_journal_destroy_journal_head_cache();
2982 jbd2_journal_destroy_handle_cache();
2983 jbd2_journal_destroy_inode_cache();
2984 jbd2_journal_destroy_transaction_cache();
2985 jbd2_journal_destroy_slabs();
2986}
2987
2988static int __init journal_init(void)
2989{
2990 int ret;
2991
2992 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
2993
2994 ret = journal_init_caches();
2995 if (ret == 0) {
2996 jbd2_create_jbd_stats_proc_entry();
2997 } else {
2998 jbd2_journal_destroy_caches();
2999 }
3000 return ret;
3001}
3002
3003static void __exit journal_exit(void)
3004{
3005#ifdef CONFIG_JBD2_DEBUG
3006 int n = atomic_read(&nr_journal_heads);
3007 if (n)
3008 printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n);
3009#endif
3010 jbd2_remove_jbd_stats_proc_entry();
3011 jbd2_journal_destroy_caches();
3012}
3013
3014MODULE_LICENSE("GPL");
3015module_init(journal_init);
3016module_exit(journal_exit);
3017
3018