1
2
3
4
5
6
7
8
9
10
11
12
13#ifndef __KERNEL__
14#include "jfs_user.h"
15#else
16#include <linux/time.h>
17#include <linux/fs.h>
18#include <linux/jbd2.h>
19#include <linux/errno.h>
20#include <linux/crc32.h>
21#include <linux/blkdev.h>
22#endif
23
24
25
26
27
28struct recovery_info
29{
30 tid_t start_transaction;
31 tid_t end_transaction;
32
33 int nr_replays;
34 int nr_revokes;
35 int nr_revoke_hits;
36};
37
38static int do_one_pass(journal_t *journal,
39 struct recovery_info *info, enum passtype pass);
40static int scan_revoke_records(journal_t *, struct buffer_head *,
41 tid_t, struct recovery_info *);
42
43#ifdef __KERNEL__
44
45
46static void journal_brelse_array(struct buffer_head *b[], int n)
47{
48 while (--n >= 0)
49 brelse (b[n]);
50}
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65#define MAXBUF 8
66static int do_readahead(journal_t *journal, unsigned int start)
67{
68 int err;
69 unsigned int max, nbufs, next;
70 unsigned long long blocknr;
71 struct buffer_head *bh;
72
73 struct buffer_head * bufs[MAXBUF];
74
75
76 max = start + (128 * 1024 / journal->j_blocksize);
77 if (max > journal->j_total_len)
78 max = journal->j_total_len;
79
80
81
82
83 nbufs = 0;
84
85 for (next = start; next < max; next++) {
86 err = jbd2_journal_bmap(journal, next, &blocknr);
87
88 if (err) {
89 printk(KERN_ERR "JBD2: bad block at offset %u\n",
90 next);
91 goto failed;
92 }
93
94 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
95 if (!bh) {
96 err = -ENOMEM;
97 goto failed;
98 }
99
100 if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
101 bufs[nbufs++] = bh;
102 if (nbufs == MAXBUF) {
103 ll_rw_block(REQ_OP_READ, 0, nbufs, bufs);
104 journal_brelse_array(bufs, nbufs);
105 nbufs = 0;
106 }
107 } else
108 brelse(bh);
109 }
110
111 if (nbufs)
112 ll_rw_block(REQ_OP_READ, 0, nbufs, bufs);
113 err = 0;
114
115failed:
116 if (nbufs)
117 journal_brelse_array(bufs, nbufs);
118 return err;
119}
120
121#endif
122
123
124
125
126
127
128static int jread(struct buffer_head **bhp, journal_t *journal,
129 unsigned int offset)
130{
131 int err;
132 unsigned long long blocknr;
133 struct buffer_head *bh;
134
135 *bhp = NULL;
136
137 if (offset >= journal->j_total_len) {
138 printk(KERN_ERR "JBD2: corrupted journal superblock\n");
139 return -EFSCORRUPTED;
140 }
141
142 err = jbd2_journal_bmap(journal, offset, &blocknr);
143
144 if (err) {
145 printk(KERN_ERR "JBD2: bad block at offset %u\n",
146 offset);
147 return err;
148 }
149
150 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
151 if (!bh)
152 return -ENOMEM;
153
154 if (!buffer_uptodate(bh)) {
155
156
157 if (!buffer_req(bh))
158 do_readahead(journal, offset);
159 wait_on_buffer(bh);
160 }
161
162 if (!buffer_uptodate(bh)) {
163 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
164 offset);
165 brelse(bh);
166 return -EIO;
167 }
168
169 *bhp = bh;
170 return 0;
171}
172
173static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
174{
175 struct jbd2_journal_block_tail *tail;
176 __be32 provided;
177 __u32 calculated;
178
179 if (!jbd2_journal_has_csum_v2or3(j))
180 return 1;
181
182 tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
183 sizeof(struct jbd2_journal_block_tail));
184 provided = tail->t_checksum;
185 tail->t_checksum = 0;
186 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
187 tail->t_checksum = provided;
188
189 return provided == cpu_to_be32(calculated);
190}
191
192
193
194
195
196static int count_tags(journal_t *journal, struct buffer_head *bh)
197{
198 char * tagp;
199 journal_block_tag_t * tag;
200 int nr = 0, size = journal->j_blocksize;
201 int tag_bytes = journal_tag_bytes(journal);
202
203 if (jbd2_journal_has_csum_v2or3(journal))
204 size -= sizeof(struct jbd2_journal_block_tail);
205
206 tagp = &bh->b_data[sizeof(journal_header_t)];
207
208 while ((tagp - bh->b_data + tag_bytes) <= size) {
209 tag = (journal_block_tag_t *) tagp;
210
211 nr++;
212 tagp += tag_bytes;
213 if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
214 tagp += 16;
215
216 if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
217 break;
218 }
219
220 return nr;
221}
222
223
224
225#define wrap(journal, var) \
226do { \
227 unsigned long _wrap_last = \
228 jbd2_has_feature_fast_commit(journal) ? \
229 (journal)->j_fc_last : (journal)->j_last; \
230 \
231 if (var >= _wrap_last) \
232 var -= (_wrap_last - (journal)->j_first); \
233} while (0)
234
235static int fc_do_one_pass(journal_t *journal,
236 struct recovery_info *info, enum passtype pass)
237{
238 unsigned int expected_commit_id = info->end_transaction;
239 unsigned long next_fc_block;
240 struct buffer_head *bh;
241 int err = 0;
242
243 next_fc_block = journal->j_fc_first;
244 if (!journal->j_fc_replay_callback)
245 return 0;
246
247 while (next_fc_block <= journal->j_fc_last) {
248 jbd_debug(3, "Fast commit replay: next block %ld",
249 next_fc_block);
250 err = jread(&bh, journal, next_fc_block);
251 if (err) {
252 jbd_debug(3, "Fast commit replay: read error");
253 break;
254 }
255
256 jbd_debug(3, "Processing fast commit blk with seq %d");
257 err = journal->j_fc_replay_callback(journal, bh, pass,
258 next_fc_block - journal->j_fc_first,
259 expected_commit_id);
260 next_fc_block++;
261 if (err < 0 || err == JBD2_FC_REPLAY_STOP)
262 break;
263 err = 0;
264 }
265
266 if (err)
267 jbd_debug(3, "Fast commit replay failed, err = %d\n", err);
268
269 return err;
270}
271
272
273
274
275
276
277
278
279
280
281
282
283
284int jbd2_journal_recover(journal_t *journal)
285{
286 int err, err2;
287 journal_superblock_t * sb;
288
289 struct recovery_info info;
290
291 memset(&info, 0, sizeof(info));
292 sb = journal->j_superblock;
293
294
295
296
297
298
299
300 if (!sb->s_start) {
301 jbd_debug(1, "No recovery required, last transaction %d\n",
302 be32_to_cpu(sb->s_sequence));
303 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
304 return 0;
305 }
306
307 err = do_one_pass(journal, &info, PASS_SCAN);
308 if (!err)
309 err = do_one_pass(journal, &info, PASS_REVOKE);
310 if (!err)
311 err = do_one_pass(journal, &info, PASS_REPLAY);
312
313 jbd_debug(1, "JBD2: recovery, exit status %d, "
314 "recovered transactions %u to %u\n",
315 err, info.start_transaction, info.end_transaction);
316 jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
317 info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
318
319
320
321 journal->j_transaction_sequence = ++info.end_transaction;
322
323 jbd2_journal_clear_revoke(journal);
324 err2 = sync_blockdev(journal->j_fs_dev);
325 if (!err)
326 err = err2;
327
328 if (journal->j_flags & JBD2_BARRIER) {
329 err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL);
330 if (!err)
331 err = err2;
332 }
333 return err;
334}
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349int jbd2_journal_skip_recovery(journal_t *journal)
350{
351 int err;
352
353 struct recovery_info info;
354
355 memset (&info, 0, sizeof(info));
356
357 err = do_one_pass(journal, &info, PASS_SCAN);
358
359 if (err) {
360 printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
361 ++journal->j_transaction_sequence;
362 } else {
363#ifdef CONFIG_JBD2_DEBUG
364 int dropped = info.end_transaction -
365 be32_to_cpu(journal->j_superblock->s_sequence);
366 jbd_debug(1,
367 "JBD2: ignoring %d transaction%s from the journal.\n",
368 dropped, (dropped == 1) ? "" : "s");
369#endif
370 journal->j_transaction_sequence = ++info.end_transaction;
371 }
372
373 journal->j_tail = 0;
374 return err;
375}
376
377static inline unsigned long long read_tag_block(journal_t *journal,
378 journal_block_tag_t *tag)
379{
380 unsigned long long block = be32_to_cpu(tag->t_blocknr);
381 if (jbd2_has_feature_64bit(journal))
382 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
383 return block;
384}
385
386
387
388
389
390static int calc_chksums(journal_t *journal, struct buffer_head *bh,
391 unsigned long *next_log_block, __u32 *crc32_sum)
392{
393 int i, num_blks, err;
394 unsigned long io_block;
395 struct buffer_head *obh;
396
397 num_blks = count_tags(journal, bh);
398
399 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
400
401 for (i = 0; i < num_blks; i++) {
402 io_block = (*next_log_block)++;
403 wrap(journal, *next_log_block);
404 err = jread(&obh, journal, io_block);
405 if (err) {
406 printk(KERN_ERR "JBD2: IO error %d recovering block "
407 "%lu in log\n", err, io_block);
408 return 1;
409 } else {
410 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
411 obh->b_size);
412 }
413 put_bh(obh);
414 }
415 return 0;
416}
417
418static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
419{
420 struct commit_header *h;
421 __be32 provided;
422 __u32 calculated;
423
424 if (!jbd2_journal_has_csum_v2or3(j))
425 return 1;
426
427 h = buf;
428 provided = h->h_chksum[0];
429 h->h_chksum[0] = 0;
430 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
431 h->h_chksum[0] = provided;
432
433 return provided == cpu_to_be32(calculated);
434}
435
436static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
437 void *buf, __u32 sequence)
438{
439 journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
440 __u32 csum32;
441 __be32 seq;
442
443 if (!jbd2_journal_has_csum_v2or3(j))
444 return 1;
445
446 seq = cpu_to_be32(sequence);
447 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
448 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
449
450 if (jbd2_has_feature_csum3(j))
451 return tag3->t_checksum == cpu_to_be32(csum32);
452 else
453 return tag->t_checksum == cpu_to_be16(csum32);
454}
455
456static int do_one_pass(journal_t *journal,
457 struct recovery_info *info, enum passtype pass)
458{
459 unsigned int first_commit_ID, next_commit_ID;
460 unsigned long next_log_block;
461 int err, success = 0;
462 journal_superblock_t * sb;
463 journal_header_t * tmp;
464 struct buffer_head * bh;
465 unsigned int sequence;
466 int blocktype;
467 int tag_bytes = journal_tag_bytes(journal);
468 __u32 crc32_sum = ~0;
469 int descr_csum_size = 0;
470 int block_error = 0;
471 bool need_check_commit_time = false;
472 __u64 last_trans_commit_time = 0, commit_time;
473
474
475
476
477
478
479
480 sb = journal->j_superblock;
481 next_commit_ID = be32_to_cpu(sb->s_sequence);
482 next_log_block = be32_to_cpu(sb->s_start);
483
484 first_commit_ID = next_commit_ID;
485 if (pass == PASS_SCAN)
486 info->start_transaction = first_commit_ID;
487
488 jbd_debug(1, "Starting recovery pass %d\n", pass);
489
490
491
492
493
494
495
496
497 while (1) {
498 int flags;
499 char * tagp;
500 journal_block_tag_t * tag;
501 struct buffer_head * obh;
502 struct buffer_head * nbh;
503
504 cond_resched();
505
506
507
508
509
510 if (pass != PASS_SCAN)
511 if (tid_geq(next_commit_ID, info->end_transaction))
512 break;
513
514 jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
515 next_commit_ID, next_log_block,
516 jbd2_has_feature_fast_commit(journal) ?
517 journal->j_fc_last : journal->j_last);
518
519
520
521
522
523 jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
524 err = jread(&bh, journal, next_log_block);
525 if (err)
526 goto failed;
527
528 next_log_block++;
529 wrap(journal, next_log_block);
530
531
532
533
534
535
536
537 tmp = (journal_header_t *)bh->b_data;
538
539 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
540 brelse(bh);
541 break;
542 }
543
544 blocktype = be32_to_cpu(tmp->h_blocktype);
545 sequence = be32_to_cpu(tmp->h_sequence);
546 jbd_debug(3, "Found magic %d, sequence %d\n",
547 blocktype, sequence);
548
549 if (sequence != next_commit_ID) {
550 brelse(bh);
551 break;
552 }
553
554
555
556
557
558 switch(blocktype) {
559 case JBD2_DESCRIPTOR_BLOCK:
560
561 if (jbd2_journal_has_csum_v2or3(journal))
562 descr_csum_size =
563 sizeof(struct jbd2_journal_block_tail);
564 if (descr_csum_size > 0 &&
565 !jbd2_descriptor_block_csum_verify(journal,
566 bh->b_data)) {
567
568
569
570
571 if (pass != PASS_SCAN) {
572 pr_err("JBD2: Invalid checksum recovering block %lu in log\n",
573 next_log_block);
574 err = -EFSBADCRC;
575 brelse(bh);
576 goto failed;
577 }
578 need_check_commit_time = true;
579 jbd_debug(1,
580 "invalid descriptor block found in %lu\n",
581 next_log_block);
582 }
583
584
585
586
587
588 if (pass != PASS_REPLAY) {
589 if (pass == PASS_SCAN &&
590 jbd2_has_feature_checksum(journal) &&
591 !need_check_commit_time &&
592 !info->end_transaction) {
593 if (calc_chksums(journal, bh,
594 &next_log_block,
595 &crc32_sum)) {
596 put_bh(bh);
597 break;
598 }
599 put_bh(bh);
600 continue;
601 }
602 next_log_block += count_tags(journal, bh);
603 wrap(journal, next_log_block);
604 put_bh(bh);
605 continue;
606 }
607
608
609
610
611
612 tagp = &bh->b_data[sizeof(journal_header_t)];
613 while ((tagp - bh->b_data + tag_bytes)
614 <= journal->j_blocksize - descr_csum_size) {
615 unsigned long io_block;
616
617 tag = (journal_block_tag_t *) tagp;
618 flags = be16_to_cpu(tag->t_flags);
619
620 io_block = next_log_block++;
621 wrap(journal, next_log_block);
622 err = jread(&obh, journal, io_block);
623 if (err) {
624
625
626 success = err;
627 printk(KERN_ERR
628 "JBD2: IO error %d recovering "
629 "block %ld in log\n",
630 err, io_block);
631 } else {
632 unsigned long long blocknr;
633
634 J_ASSERT(obh != NULL);
635 blocknr = read_tag_block(journal,
636 tag);
637
638
639
640
641 if (jbd2_journal_test_revoke
642 (journal, blocknr,
643 next_commit_ID)) {
644 brelse(obh);
645 ++info->nr_revoke_hits;
646 goto skip_write;
647 }
648
649
650 if (!jbd2_block_tag_csum_verify(
651 journal, tag, obh->b_data,
652 be32_to_cpu(tmp->h_sequence))) {
653 brelse(obh);
654 success = -EFSBADCRC;
655 printk(KERN_ERR "JBD2: Invalid "
656 "checksum recovering "
657 "data block %llu in "
658 "log\n", blocknr);
659 block_error = 1;
660 goto skip_write;
661 }
662
663
664
665 nbh = __getblk(journal->j_fs_dev,
666 blocknr,
667 journal->j_blocksize);
668 if (nbh == NULL) {
669 printk(KERN_ERR
670 "JBD2: Out of memory "
671 "during recovery.\n");
672 err = -ENOMEM;
673 brelse(bh);
674 brelse(obh);
675 goto failed;
676 }
677
678 lock_buffer(nbh);
679 memcpy(nbh->b_data, obh->b_data,
680 journal->j_blocksize);
681 if (flags & JBD2_FLAG_ESCAPE) {
682 *((__be32 *)nbh->b_data) =
683 cpu_to_be32(JBD2_MAGIC_NUMBER);
684 }
685
686 BUFFER_TRACE(nbh, "marking dirty");
687 set_buffer_uptodate(nbh);
688 mark_buffer_dirty(nbh);
689 BUFFER_TRACE(nbh, "marking uptodate");
690 ++info->nr_replays;
691
692 unlock_buffer(nbh);
693 brelse(obh);
694 brelse(nbh);
695 }
696
697 skip_write:
698 tagp += tag_bytes;
699 if (!(flags & JBD2_FLAG_SAME_UUID))
700 tagp += 16;
701
702 if (flags & JBD2_FLAG_LAST_TAG)
703 break;
704 }
705
706 brelse(bh);
707 continue;
708
709 case JBD2_COMMIT_BLOCK:
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740 commit_time = be64_to_cpu(
741 ((struct commit_header *)bh->b_data)->h_commit_sec);
742
743
744
745
746
747
748
749 if (need_check_commit_time) {
750 if (commit_time >= last_trans_commit_time) {
751 pr_err("JBD2: Invalid checksum found in transaction %u\n",
752 next_commit_ID);
753 err = -EFSBADCRC;
754 brelse(bh);
755 goto failed;
756 }
757 ignore_crc_mismatch:
758
759
760
761
762 jbd_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
763 next_commit_ID);
764 err = 0;
765 brelse(bh);
766 goto done;
767 }
768
769
770
771
772
773
774
775 if (pass == PASS_SCAN &&
776 jbd2_has_feature_checksum(journal)) {
777 struct commit_header *cbh =
778 (struct commit_header *)bh->b_data;
779 unsigned found_chksum =
780 be32_to_cpu(cbh->h_chksum[0]);
781
782 if (info->end_transaction) {
783 journal->j_failed_commit =
784 info->end_transaction;
785 brelse(bh);
786 break;
787 }
788
789
790 if (!((crc32_sum == found_chksum &&
791 cbh->h_chksum_type ==
792 JBD2_CRC32_CHKSUM &&
793 cbh->h_chksum_size ==
794 JBD2_CRC32_CHKSUM_SIZE) ||
795 (cbh->h_chksum_type == 0 &&
796 cbh->h_chksum_size == 0 &&
797 found_chksum == 0)))
798 goto chksum_error;
799
800 crc32_sum = ~0;
801 }
802 if (pass == PASS_SCAN &&
803 !jbd2_commit_block_csum_verify(journal,
804 bh->b_data)) {
805 chksum_error:
806 if (commit_time < last_trans_commit_time)
807 goto ignore_crc_mismatch;
808 info->end_transaction = next_commit_ID;
809
810 if (!jbd2_has_feature_async_commit(journal)) {
811 journal->j_failed_commit =
812 next_commit_ID;
813 brelse(bh);
814 break;
815 }
816 }
817 if (pass == PASS_SCAN)
818 last_trans_commit_time = commit_time;
819 brelse(bh);
820 next_commit_ID++;
821 continue;
822
823 case JBD2_REVOKE_BLOCK:
824
825
826
827
828 if (pass == PASS_SCAN &&
829 !jbd2_descriptor_block_csum_verify(journal,
830 bh->b_data)) {
831 jbd_debug(1, "JBD2: invalid revoke block found in %lu\n",
832 next_log_block);
833 need_check_commit_time = true;
834 }
835
836
837 if (pass != PASS_REVOKE) {
838 brelse(bh);
839 continue;
840 }
841
842 err = scan_revoke_records(journal, bh,
843 next_commit_ID, info);
844 brelse(bh);
845 if (err)
846 goto failed;
847 continue;
848
849 default:
850 jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
851 blocktype);
852 brelse(bh);
853 goto done;
854 }
855 }
856
857 done:
858
859
860
861
862
863
864
865 if (pass == PASS_SCAN) {
866 if (!info->end_transaction)
867 info->end_transaction = next_commit_ID;
868 } else {
869
870
871 if (info->end_transaction != next_commit_ID) {
872 printk(KERN_ERR "JBD2: recovery pass %d ended at "
873 "transaction %u, expected %u\n",
874 pass, next_commit_ID, info->end_transaction);
875 if (!success)
876 success = -EIO;
877 }
878 }
879
880 if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) {
881 err = fc_do_one_pass(journal, info, pass);
882 if (err)
883 success = err;
884 }
885
886 if (block_error && success == 0)
887 success = -EIO;
888 return success;
889
890 failed:
891 return err;
892}
893
894
895
896static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
897 tid_t sequence, struct recovery_info *info)
898{
899 jbd2_journal_revoke_header_t *header;
900 int offset, max;
901 int csum_size = 0;
902 __u32 rcount;
903 int record_len = 4;
904
905 header = (jbd2_journal_revoke_header_t *) bh->b_data;
906 offset = sizeof(jbd2_journal_revoke_header_t);
907 rcount = be32_to_cpu(header->r_count);
908
909 if (jbd2_journal_has_csum_v2or3(journal))
910 csum_size = sizeof(struct jbd2_journal_block_tail);
911 if (rcount > journal->j_blocksize - csum_size)
912 return -EINVAL;
913 max = rcount;
914
915 if (jbd2_has_feature_64bit(journal))
916 record_len = 8;
917
918 while (offset + record_len <= max) {
919 unsigned long long blocknr;
920 int err;
921
922 if (record_len == 4)
923 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
924 else
925 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
926 offset += record_len;
927 err = jbd2_journal_set_revoke(journal, blocknr, sequence);
928 if (err)
929 return err;
930 ++info->nr_revokes;
931 }
932 return 0;
933}
934