1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#ifndef __KERNEL__
17#include "jfs_user.h"
18#else
19#include <linux/time.h>
20#include <linux/fs.h>
21#include <linux/jbd2.h>
22#include <linux/errno.h>
23#include <linux/slab.h>
24#include <linux/crc32.h>
25#endif
26
27
28
29
30
31struct recovery_info
32{
33 tid_t start_transaction;
34 tid_t end_transaction;
35
36 int nr_replays;
37 int nr_revokes;
38 int nr_revoke_hits;
39};
40
41enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
42static int do_one_pass(journal_t *journal,
43 struct recovery_info *info, enum passtype pass);
44static int scan_revoke_records(journal_t *, struct buffer_head *,
45 tid_t, struct recovery_info *);
46
47#ifdef __KERNEL__
48
49
50static void journal_brelse_array(struct buffer_head *b[], int n)
51{
52 while (--n >= 0)
53 brelse (b[n]);
54}
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69#define MAXBUF 8
70static int do_readahead(journal_t *journal, unsigned int start)
71{
72 int err;
73 unsigned int max, nbufs, next;
74 unsigned long long blocknr;
75 struct buffer_head *bh;
76
77 struct buffer_head * bufs[MAXBUF];
78
79
80 max = start + (128 * 1024 / journal->j_blocksize);
81 if (max > journal->j_maxlen)
82 max = journal->j_maxlen;
83
84
85
86
87 nbufs = 0;
88
89 for (next = start; next < max; next++) {
90 err = jbd2_journal_bmap(journal, next, &blocknr);
91
92 if (err) {
93 printk (KERN_ERR "JBD: bad block at offset %u\n",
94 next);
95 goto failed;
96 }
97
98 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
99 if (!bh) {
100 err = -ENOMEM;
101 goto failed;
102 }
103
104 if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
105 bufs[nbufs++] = bh;
106 if (nbufs == MAXBUF) {
107 ll_rw_block(READ, nbufs, bufs);
108 journal_brelse_array(bufs, nbufs);
109 nbufs = 0;
110 }
111 } else
112 brelse(bh);
113 }
114
115 if (nbufs)
116 ll_rw_block(READ, nbufs, bufs);
117 err = 0;
118
119failed:
120 if (nbufs)
121 journal_brelse_array(bufs, nbufs);
122 return err;
123}
124
125#endif
126
127
128
129
130
131
132static int jread(struct buffer_head **bhp, journal_t *journal,
133 unsigned int offset)
134{
135 int err;
136 unsigned long long blocknr;
137 struct buffer_head *bh;
138
139 *bhp = NULL;
140
141 if (offset >= journal->j_maxlen) {
142 printk(KERN_ERR "JBD: corrupted journal superblock\n");
143 return -EIO;
144 }
145
146 err = jbd2_journal_bmap(journal, offset, &blocknr);
147
148 if (err) {
149 printk (KERN_ERR "JBD: bad block at offset %u\n",
150 offset);
151 return err;
152 }
153
154 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
155 if (!bh)
156 return -ENOMEM;
157
158 if (!buffer_uptodate(bh)) {
159
160
161 if (!buffer_req(bh))
162 do_readahead(journal, offset);
163 wait_on_buffer(bh);
164 }
165
166 if (!buffer_uptodate(bh)) {
167 printk (KERN_ERR "JBD: Failed to read block at offset %u\n",
168 offset);
169 brelse(bh);
170 return -EIO;
171 }
172
173 *bhp = bh;
174 return 0;
175}
176
177
178
179
180
181
182static int count_tags(journal_t *journal, struct buffer_head *bh)
183{
184 char * tagp;
185 journal_block_tag_t * tag;
186 int nr = 0, size = journal->j_blocksize;
187 int tag_bytes = journal_tag_bytes(journal);
188
189 tagp = &bh->b_data[sizeof(journal_header_t)];
190
191 while ((tagp - bh->b_data + tag_bytes) <= size) {
192 tag = (journal_block_tag_t *) tagp;
193
194 nr++;
195 tagp += tag_bytes;
196 if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID)))
197 tagp += 16;
198
199 if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG))
200 break;
201 }
202
203 return nr;
204}
205
206
207
208#define wrap(journal, var) \
209do { \
210 if (var >= (journal)->j_last) \
211 var -= ((journal)->j_last - (journal)->j_first); \
212} while (0)
213
214
215
216
217
218
219
220
221
222
223
224
225
226int jbd2_journal_recover(journal_t *journal)
227{
228 int err, err2;
229 journal_superblock_t * sb;
230
231 struct recovery_info info;
232
233 memset(&info, 0, sizeof(info));
234 sb = journal->j_superblock;
235
236
237
238
239
240
241
242 if (!sb->s_start) {
243 jbd_debug(1, "No recovery required, last transaction %d\n",
244 be32_to_cpu(sb->s_sequence));
245 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
246 return 0;
247 }
248
249 err = do_one_pass(journal, &info, PASS_SCAN);
250 if (!err)
251 err = do_one_pass(journal, &info, PASS_REVOKE);
252 if (!err)
253 err = do_one_pass(journal, &info, PASS_REPLAY);
254
255 jbd_debug(1, "JBD: recovery, exit status %d, "
256 "recovered transactions %u to %u\n",
257 err, info.start_transaction, info.end_transaction);
258 jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n",
259 info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
260
261
262
263 journal->j_transaction_sequence = ++info.end_transaction;
264
265 jbd2_journal_clear_revoke(journal);
266 err2 = sync_blockdev(journal->j_fs_dev);
267 if (!err)
268 err = err2;
269
270 return err;
271}
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286int jbd2_journal_skip_recovery(journal_t *journal)
287{
288 int err;
289 journal_superblock_t * sb;
290
291 struct recovery_info info;
292
293 memset (&info, 0, sizeof(info));
294 sb = journal->j_superblock;
295
296 err = do_one_pass(journal, &info, PASS_SCAN);
297
298 if (err) {
299 printk(KERN_ERR "JBD: error %d scanning journal\n", err);
300 ++journal->j_transaction_sequence;
301 } else {
302#ifdef CONFIG_JBD2_DEBUG
303 int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
304#endif
305 jbd_debug(1,
306 "JBD: ignoring %d transaction%s from the journal.\n",
307 dropped, (dropped == 1) ? "" : "s");
308 journal->j_transaction_sequence = ++info.end_transaction;
309 }
310
311 journal->j_tail = 0;
312 return err;
313}
314
315static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
316{
317 unsigned long long block = be32_to_cpu(tag->t_blocknr);
318 if (tag_bytes > JBD2_TAG_SIZE32)
319 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
320 return block;
321}
322
323
324
325
326
327static int calc_chksums(journal_t *journal, struct buffer_head *bh,
328 unsigned long *next_log_block, __u32 *crc32_sum)
329{
330 int i, num_blks, err;
331 unsigned long io_block;
332 struct buffer_head *obh;
333
334 num_blks = count_tags(journal, bh);
335
336 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
337
338 for (i = 0; i < num_blks; i++) {
339 io_block = (*next_log_block)++;
340 wrap(journal, *next_log_block);
341 err = jread(&obh, journal, io_block);
342 if (err) {
343 printk(KERN_ERR "JBD: IO error %d recovering block "
344 "%lu in log\n", err, io_block);
345 return 1;
346 } else {
347 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
348 obh->b_size);
349 }
350 put_bh(obh);
351 }
352 return 0;
353}
354
355static int do_one_pass(journal_t *journal,
356 struct recovery_info *info, enum passtype pass)
357{
358 unsigned int first_commit_ID, next_commit_ID;
359 unsigned long next_log_block;
360 int err, success = 0;
361 journal_superblock_t * sb;
362 journal_header_t * tmp;
363 struct buffer_head * bh;
364 unsigned int sequence;
365 int blocktype;
366 int tag_bytes = journal_tag_bytes(journal);
367 __u32 crc32_sum = ~0;
368
369
370 int MAX_BLOCKS_PER_DESC;
371 MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
372 / tag_bytes);
373
374
375
376
377
378
379
380 sb = journal->j_superblock;
381 next_commit_ID = be32_to_cpu(sb->s_sequence);
382 next_log_block = be32_to_cpu(sb->s_start);
383
384 first_commit_ID = next_commit_ID;
385 if (pass == PASS_SCAN)
386 info->start_transaction = first_commit_ID;
387
388 jbd_debug(1, "Starting recovery pass %d\n", pass);
389
390
391
392
393
394
395
396
397 while (1) {
398 int flags;
399 char * tagp;
400 journal_block_tag_t * tag;
401 struct buffer_head * obh;
402 struct buffer_head * nbh;
403
404 cond_resched();
405
406
407
408
409
410 if (pass != PASS_SCAN)
411 if (tid_geq(next_commit_ID, info->end_transaction))
412 break;
413
414 jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
415 next_commit_ID, next_log_block, journal->j_last);
416
417
418
419
420
421 jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
422 err = jread(&bh, journal, next_log_block);
423 if (err)
424 goto failed;
425
426 next_log_block++;
427 wrap(journal, next_log_block);
428
429
430
431
432
433
434
435 tmp = (journal_header_t *)bh->b_data;
436
437 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
438 brelse(bh);
439 break;
440 }
441
442 blocktype = be32_to_cpu(tmp->h_blocktype);
443 sequence = be32_to_cpu(tmp->h_sequence);
444 jbd_debug(3, "Found magic %d, sequence %d\n",
445 blocktype, sequence);
446
447 if (sequence != next_commit_ID) {
448 brelse(bh);
449 break;
450 }
451
452
453
454
455
456 switch(blocktype) {
457 case JBD2_DESCRIPTOR_BLOCK:
458
459
460
461
462 if (pass != PASS_REPLAY) {
463 if (pass == PASS_SCAN &&
464 JBD2_HAS_COMPAT_FEATURE(journal,
465 JBD2_FEATURE_COMPAT_CHECKSUM) &&
466 !info->end_transaction) {
467 if (calc_chksums(journal, bh,
468 &next_log_block,
469 &crc32_sum)) {
470 put_bh(bh);
471 break;
472 }
473 put_bh(bh);
474 continue;
475 }
476 next_log_block += count_tags(journal, bh);
477 wrap(journal, next_log_block);
478 put_bh(bh);
479 continue;
480 }
481
482
483
484
485
486 tagp = &bh->b_data[sizeof(journal_header_t)];
487 while ((tagp - bh->b_data + tag_bytes)
488 <= journal->j_blocksize) {
489 unsigned long io_block;
490
491 tag = (journal_block_tag_t *) tagp;
492 flags = be32_to_cpu(tag->t_flags);
493
494 io_block = next_log_block++;
495 wrap(journal, next_log_block);
496 err = jread(&obh, journal, io_block);
497 if (err) {
498
499
500 success = err;
501 printk (KERN_ERR
502 "JBD: IO error %d recovering "
503 "block %ld in log\n",
504 err, io_block);
505 } else {
506 unsigned long long blocknr;
507
508 J_ASSERT(obh != NULL);
509 blocknr = read_tag_block(tag_bytes,
510 tag);
511
512
513
514
515 if (jbd2_journal_test_revoke
516 (journal, blocknr,
517 next_commit_ID)) {
518 brelse(obh);
519 ++info->nr_revoke_hits;
520 goto skip_write;
521 }
522
523
524
525 nbh = __getblk(journal->j_fs_dev,
526 blocknr,
527 journal->j_blocksize);
528 if (nbh == NULL) {
529 printk(KERN_ERR
530 "JBD: Out of memory "
531 "during recovery.\n");
532 err = -ENOMEM;
533 brelse(bh);
534 brelse(obh);
535 goto failed;
536 }
537
538 lock_buffer(nbh);
539 memcpy(nbh->b_data, obh->b_data,
540 journal->j_blocksize);
541 if (flags & JBD2_FLAG_ESCAPE) {
542 *((__be32 *)nbh->b_data) =
543 cpu_to_be32(JBD2_MAGIC_NUMBER);
544 }
545
546 BUFFER_TRACE(nbh, "marking dirty");
547 set_buffer_uptodate(nbh);
548 mark_buffer_dirty(nbh);
549 BUFFER_TRACE(nbh, "marking uptodate");
550 ++info->nr_replays;
551
552 unlock_buffer(nbh);
553 brelse(obh);
554 brelse(nbh);
555 }
556
557 skip_write:
558 tagp += tag_bytes;
559 if (!(flags & JBD2_FLAG_SAME_UUID))
560 tagp += 16;
561
562 if (flags & JBD2_FLAG_LAST_TAG)
563 break;
564 }
565
566 brelse(bh);
567 continue;
568
569 case JBD2_COMMIT_BLOCK:
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605 if (pass == PASS_SCAN &&
606 JBD2_HAS_COMPAT_FEATURE(journal,
607 JBD2_FEATURE_COMPAT_CHECKSUM)) {
608 int chksum_err, chksum_seen;
609 struct commit_header *cbh =
610 (struct commit_header *)bh->b_data;
611 unsigned found_chksum =
612 be32_to_cpu(cbh->h_chksum[0]);
613
614 chksum_err = chksum_seen = 0;
615
616 if (info->end_transaction) {
617 journal->j_failed_commit =
618 info->end_transaction;
619 brelse(bh);
620 break;
621 }
622
623 if (crc32_sum == found_chksum &&
624 cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
625 cbh->h_chksum_size ==
626 JBD2_CRC32_CHKSUM_SIZE)
627 chksum_seen = 1;
628 else if (!(cbh->h_chksum_type == 0 &&
629 cbh->h_chksum_size == 0 &&
630 found_chksum == 0 &&
631 !chksum_seen))
632
633
634
635
636
637
638
639
640
641
642 chksum_err = 1;
643
644 if (chksum_err) {
645 info->end_transaction = next_commit_ID;
646
647 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
648 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
649 journal->j_failed_commit =
650 next_commit_ID;
651 brelse(bh);
652 break;
653 }
654 }
655 crc32_sum = ~0;
656 }
657 brelse(bh);
658 next_commit_ID++;
659 continue;
660
661 case JBD2_REVOKE_BLOCK:
662
663
664 if (pass != PASS_REVOKE) {
665 brelse(bh);
666 continue;
667 }
668
669 err = scan_revoke_records(journal, bh,
670 next_commit_ID, info);
671 brelse(bh);
672 if (err)
673 goto failed;
674 continue;
675
676 default:
677 jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
678 blocktype);
679 brelse(bh);
680 goto done;
681 }
682 }
683
684 done:
685
686
687
688
689
690
691
692 if (pass == PASS_SCAN) {
693 if (!info->end_transaction)
694 info->end_transaction = next_commit_ID;
695 } else {
696
697
698 if (info->end_transaction != next_commit_ID) {
699 printk (KERN_ERR "JBD: recovery pass %d ended at "
700 "transaction %u, expected %u\n",
701 pass, next_commit_ID, info->end_transaction);
702 if (!success)
703 success = -EIO;
704 }
705 }
706
707 return success;
708
709 failed:
710 return err;
711}
712
713
714
715
716static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
717 tid_t sequence, struct recovery_info *info)
718{
719 jbd2_journal_revoke_header_t *header;
720 int offset, max;
721 int record_len = 4;
722
723 header = (jbd2_journal_revoke_header_t *) bh->b_data;
724 offset = sizeof(jbd2_journal_revoke_header_t);
725 max = be32_to_cpu(header->r_count);
726
727 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
728 record_len = 8;
729
730 while (offset + record_len <= max) {
731 unsigned long long blocknr;
732 int err;
733
734 if (record_len == 4)
735 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
736 else
737 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
738 offset += record_len;
739 err = jbd2_journal_set_revoke(journal, blocknr, sequence);
740 if (err)
741 return err;
742 ++info->nr_revokes;
743 }
744 return 0;
745}
746