1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/time.h>
17#include <linux/fs.h>
18#include <linux/jbd.h>
19#include <linux/errno.h>
20#include <linux/slab.h>
21#include <linux/mm.h>
22#include <linux/pagemap.h>
23
24
25
26
27static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
28{
29 BUFFER_TRACE(bh, "");
30 if (uptodate)
31 set_buffer_uptodate(bh);
32 else
33 clear_buffer_uptodate(bh);
34 unlock_buffer(bh);
35}
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51static void release_buffer_page(struct buffer_head *bh)
52{
53 struct page *page;
54
55 if (buffer_dirty(bh))
56 goto nope;
57 if (atomic_read(&bh->b_count) != 1)
58 goto nope;
59 page = bh->b_page;
60 if (!page)
61 goto nope;
62 if (page->mapping)
63 goto nope;
64
65
66 if (TestSetPageLocked(page))
67 goto nope;
68
69 page_cache_get(page);
70 __brelse(bh);
71 try_to_free_buffers(page);
72 unlock_page(page);
73 page_cache_release(page);
74 return;
75
76nope:
77 __brelse(bh);
78}
79
80
81
82
83
84
85static int inverted_lock(journal_t *journal, struct buffer_head *bh)
86{
87 if (!jbd_trylock_bh_state(bh)) {
88 spin_unlock(&journal->j_list_lock);
89 schedule();
90 return 0;
91 }
92 return 1;
93}
94
95
96
97
98
99
100
101
102static int journal_write_commit_record(journal_t *journal,
103 transaction_t *commit_transaction)
104{
105 struct journal_head *descriptor;
106 struct buffer_head *bh;
107 int i, ret;
108 int barrier_done = 0;
109
110 if (is_journal_aborted(journal))
111 return 0;
112
113 descriptor = journal_get_descriptor_buffer(journal);
114 if (!descriptor)
115 return 1;
116
117 bh = jh2bh(descriptor);
118
119
120 for (i = 0; i < bh->b_size; i += 512) {
121 journal_header_t *tmp = (journal_header_t*)bh->b_data;
122 tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
123 tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
124 tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
125 }
126
127 JBUFFER_TRACE(descriptor, "write commit block");
128 set_buffer_dirty(bh);
129 if (journal->j_flags & JFS_BARRIER) {
130 set_buffer_ordered(bh);
131 barrier_done = 1;
132 }
133 ret = sync_dirty_buffer(bh);
134
135
136
137
138
139 if (ret == -EOPNOTSUPP && barrier_done) {
140 char b[BDEVNAME_SIZE];
141
142 printk(KERN_WARNING
143 "JBD: barrier-based sync failed on %s - "
144 "disabling barriers\n",
145 bdevname(journal->j_dev, b));
146 spin_lock(&journal->j_state_lock);
147 journal->j_flags &= ~JFS_BARRIER;
148 spin_unlock(&journal->j_state_lock);
149
150
151 clear_buffer_ordered(bh);
152 set_buffer_uptodate(bh);
153 set_buffer_dirty(bh);
154 ret = sync_dirty_buffer(bh);
155 }
156 put_bh(bh);
157 journal_put_journal_head(descriptor);
158
159 return (ret == -EIO);
160}
161
162static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
163{
164 int i;
165
166 for (i = 0; i < bufs; i++) {
167 wbuf[i]->b_end_io = end_buffer_write_sync;
168
169 submit_bh(WRITE, wbuf[i]);
170 }
171}
172
173
174
175
176static void journal_submit_data_buffers(journal_t *journal,
177 transaction_t *commit_transaction)
178{
179 struct journal_head *jh;
180 struct buffer_head *bh;
181 int locked;
182 int bufs = 0;
183 struct buffer_head **wbuf = journal->j_wbuf;
184
185
186
187
188
189
190
191
192
193write_out_data:
194 cond_resched();
195 spin_lock(&journal->j_list_lock);
196
197 while (commit_transaction->t_sync_datalist) {
198 jh = commit_transaction->t_sync_datalist;
199 bh = jh2bh(jh);
200 locked = 0;
201
202
203
204 get_bh(bh);
205
206
207
208
209
210 if (buffer_dirty(bh)) {
211 if (test_set_buffer_locked(bh)) {
212 BUFFER_TRACE(bh, "needs blocking lock");
213 spin_unlock(&journal->j_list_lock);
214
215 journal_do_submit_data(wbuf, bufs);
216 bufs = 0;
217 lock_buffer(bh);
218 spin_lock(&journal->j_list_lock);
219 }
220 locked = 1;
221 }
222
223 if (!inverted_lock(journal, bh)) {
224 jbd_lock_bh_state(bh);
225 spin_lock(&journal->j_list_lock);
226 }
227
228 if (!buffer_jbd(bh)
229 || jh->b_transaction != commit_transaction
230 || jh->b_jlist != BJ_SyncData) {
231 jbd_unlock_bh_state(bh);
232 if (locked)
233 unlock_buffer(bh);
234 BUFFER_TRACE(bh, "already cleaned up");
235 put_bh(bh);
236 continue;
237 }
238 if (locked && test_clear_buffer_dirty(bh)) {
239 BUFFER_TRACE(bh, "needs writeout, adding to array");
240 wbuf[bufs++] = bh;
241 __journal_file_buffer(jh, commit_transaction,
242 BJ_Locked);
243 jbd_unlock_bh_state(bh);
244 if (bufs == journal->j_wbufsize) {
245 spin_unlock(&journal->j_list_lock);
246 journal_do_submit_data(wbuf, bufs);
247 bufs = 0;
248 goto write_out_data;
249 }
250 } else if (!locked && buffer_locked(bh)) {
251 __journal_file_buffer(jh, commit_transaction,
252 BJ_Locked);
253 jbd_unlock_bh_state(bh);
254 put_bh(bh);
255 } else {
256 BUFFER_TRACE(bh, "writeout complete: unfile");
257 __journal_unfile_buffer(jh);
258 jbd_unlock_bh_state(bh);
259 if (locked)
260 unlock_buffer(bh);
261 journal_remove_journal_head(bh);
262
263
264 put_bh(bh);
265 put_bh(bh);
266 }
267
268 if (lock_need_resched(&journal->j_list_lock)) {
269 spin_unlock(&journal->j_list_lock);
270 goto write_out_data;
271 }
272 }
273 spin_unlock(&journal->j_list_lock);
274 journal_do_submit_data(wbuf, bufs);
275}
276
277
278
279
280
281
282
283void journal_commit_transaction(journal_t *journal)
284{
285 transaction_t *commit_transaction;
286 struct journal_head *jh, *new_jh, *descriptor;
287 struct buffer_head **wbuf = journal->j_wbuf;
288 int bufs;
289 int flags;
290 int err;
291 unsigned long blocknr;
292 char *tagp = NULL;
293 journal_header_t *header;
294 journal_block_tag_t *tag = NULL;
295 int space_left = 0;
296 int first_tag = 0;
297 int tag_flag;
298 int i;
299
300
301
302
303
304
305#ifdef COMMIT_STATS
306 spin_lock(&journal->j_list_lock);
307 summarise_journal_usage(journal);
308 spin_unlock(&journal->j_list_lock);
309#endif
310
311
312 if (journal->j_flags & JFS_FLUSHED) {
313 jbd_debug(3, "super block updated\n");
314 journal_update_superblock(journal, 1);
315 } else {
316 jbd_debug(3, "superblock not updated\n");
317 }
318
319 J_ASSERT(journal->j_running_transaction != NULL);
320 J_ASSERT(journal->j_committing_transaction == NULL);
321
322 commit_transaction = journal->j_running_transaction;
323 J_ASSERT(commit_transaction->t_state == T_RUNNING);
324
325 jbd_debug(1, "JBD: starting commit of transaction %d\n",
326 commit_transaction->t_tid);
327
328 spin_lock(&journal->j_state_lock);
329 commit_transaction->t_state = T_LOCKED;
330
331 spin_lock(&commit_transaction->t_handle_lock);
332 while (commit_transaction->t_updates) {
333 DEFINE_WAIT(wait);
334
335 prepare_to_wait(&journal->j_wait_updates, &wait,
336 TASK_UNINTERRUPTIBLE);
337 if (commit_transaction->t_updates) {
338 spin_unlock(&commit_transaction->t_handle_lock);
339 spin_unlock(&journal->j_state_lock);
340 schedule();
341 spin_lock(&journal->j_state_lock);
342 spin_lock(&commit_transaction->t_handle_lock);
343 }
344 finish_wait(&journal->j_wait_updates, &wait);
345 }
346 spin_unlock(&commit_transaction->t_handle_lock);
347
348 J_ASSERT (commit_transaction->t_outstanding_credits <=
349 journal->j_max_transaction_buffers);
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367 while (commit_transaction->t_reserved_list) {
368 jh = commit_transaction->t_reserved_list;
369 JBUFFER_TRACE(jh, "reserved, unused: refile");
370
371
372
373
374 if (jh->b_committed_data) {
375 struct buffer_head *bh = jh2bh(jh);
376
377 jbd_lock_bh_state(bh);
378 jbd_free(jh->b_committed_data, bh->b_size);
379 jh->b_committed_data = NULL;
380 jbd_unlock_bh_state(bh);
381 }
382 journal_refile_buffer(journal, jh);
383 }
384
385
386
387
388
389
390 spin_lock(&journal->j_list_lock);
391 __journal_clean_checkpoint_list(journal);
392 spin_unlock(&journal->j_list_lock);
393
394 jbd_debug (3, "JBD: commit phase 1\n");
395
396
397
398
399 journal_switch_revoke_table(journal);
400
401 commit_transaction->t_state = T_FLUSH;
402 journal->j_committing_transaction = commit_transaction;
403 journal->j_running_transaction = NULL;
404 commit_transaction->t_log_start = journal->j_head;
405 wake_up(&journal->j_wait_transaction_locked);
406 spin_unlock(&journal->j_state_lock);
407
408 jbd_debug (3, "JBD: commit phase 2\n");
409
410
411
412
413
414 spin_lock(&journal->j_list_lock);
415 if (commit_transaction->t_buffers) {
416 new_jh = jh = commit_transaction->t_buffers->b_tnext;
417 do {
418 J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
419 new_jh->b_modified == 0);
420 new_jh->b_modified = 0;
421 new_jh = new_jh->b_tnext;
422 } while (new_jh != jh);
423 }
424 spin_unlock(&journal->j_list_lock);
425
426
427
428
429
430 err = 0;
431 journal_submit_data_buffers(journal, commit_transaction);
432
433
434
435
436 spin_lock(&journal->j_list_lock);
437 while (commit_transaction->t_locked_list) {
438 struct buffer_head *bh;
439
440 jh = commit_transaction->t_locked_list->b_tprev;
441 bh = jh2bh(jh);
442 get_bh(bh);
443 if (buffer_locked(bh)) {
444 spin_unlock(&journal->j_list_lock);
445 wait_on_buffer(bh);
446 if (unlikely(!buffer_uptodate(bh)))
447 err = -EIO;
448 spin_lock(&journal->j_list_lock);
449 }
450 if (!inverted_lock(journal, bh)) {
451 put_bh(bh);
452 spin_lock(&journal->j_list_lock);
453 continue;
454 }
455 if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
456 __journal_unfile_buffer(jh);
457 jbd_unlock_bh_state(bh);
458 journal_remove_journal_head(bh);
459 put_bh(bh);
460 } else {
461 jbd_unlock_bh_state(bh);
462 }
463 put_bh(bh);
464 cond_resched_lock(&journal->j_list_lock);
465 }
466 spin_unlock(&journal->j_list_lock);
467
468 if (err)
469 journal_abort(journal, err);
470
471 journal_write_revoke_records(journal, commit_transaction);
472
473 jbd_debug(3, "JBD: commit phase 2\n");
474
475
476
477
478
479
480
481 J_ASSERT (commit_transaction->t_sync_datalist == NULL);
482
483 jbd_debug (3, "JBD: commit phase 3\n");
484
485
486
487
488
489
490 commit_transaction->t_state = T_COMMIT;
491
492 descriptor = NULL;
493 bufs = 0;
494 while (commit_transaction->t_buffers) {
495
496
497
498 jh = commit_transaction->t_buffers;
499
500
501
502
503 if (is_journal_aborted(journal)) {
504 JBUFFER_TRACE(jh, "journal is aborting: refile");
505 journal_refile_buffer(journal, jh);
506
507
508
509
510 if (!commit_transaction->t_buffers)
511 goto start_journal_io;
512 continue;
513 }
514
515
516
517
518 if (!descriptor) {
519 struct buffer_head *bh;
520
521 J_ASSERT (bufs == 0);
522
523 jbd_debug(4, "JBD: get descriptor\n");
524
525 descriptor = journal_get_descriptor_buffer(journal);
526 if (!descriptor) {
527 journal_abort(journal, -EIO);
528 continue;
529 }
530
531 bh = jh2bh(descriptor);
532 jbd_debug(4, "JBD: got buffer %llu (%p)\n",
533 (unsigned long long)bh->b_blocknr, bh->b_data);
534 header = (journal_header_t *)&bh->b_data[0];
535 header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
536 header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK);
537 header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
538
539 tagp = &bh->b_data[sizeof(journal_header_t)];
540 space_left = bh->b_size - sizeof(journal_header_t);
541 first_tag = 1;
542 set_buffer_jwrite(bh);
543 set_buffer_dirty(bh);
544 wbuf[bufs++] = bh;
545
546
547
548 BUFFER_TRACE(bh, "ph3: file as descriptor");
549 journal_file_buffer(descriptor, commit_transaction,
550 BJ_LogCtl);
551 }
552
553
554
555 err = journal_next_log_block(journal, &blocknr);
556
557
558
559 if (err) {
560 journal_abort(journal, err);
561 continue;
562 }
563
564
565
566
567
568
569 commit_transaction->t_outstanding_credits--;
570
571
572
573
574 atomic_inc(&jh2bh(jh)->b_count);
575
576
577
578
579
580 set_bit(BH_JWrite, &jh2bh(jh)->b_state);
581
582
583
584
585
586
587 JBUFFER_TRACE(jh, "ph3: write metadata");
588 flags = journal_write_metadata_buffer(commit_transaction,
589 jh, &new_jh, blocknr);
590 set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
591 wbuf[bufs++] = jh2bh(new_jh);
592
593
594
595
596 tag_flag = 0;
597 if (flags & 1)
598 tag_flag |= JFS_FLAG_ESCAPE;
599 if (!first_tag)
600 tag_flag |= JFS_FLAG_SAME_UUID;
601
602 tag = (journal_block_tag_t *) tagp;
603 tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
604 tag->t_flags = cpu_to_be32(tag_flag);
605 tagp += sizeof(journal_block_tag_t);
606 space_left -= sizeof(journal_block_tag_t);
607
608 if (first_tag) {
609 memcpy (tagp, journal->j_uuid, 16);
610 tagp += 16;
611 space_left -= 16;
612 first_tag = 0;
613 }
614
615
616
617
618 if (bufs == journal->j_wbufsize ||
619 commit_transaction->t_buffers == NULL ||
620 space_left < sizeof(journal_block_tag_t) + 16) {
621
622 jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
623
624
625
626
627
628 tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG);
629
630start_journal_io:
631 for (i = 0; i < bufs; i++) {
632 struct buffer_head *bh = wbuf[i];
633 lock_buffer(bh);
634 clear_buffer_dirty(bh);
635 set_buffer_uptodate(bh);
636 bh->b_end_io = journal_end_buffer_io_sync;
637 submit_bh(WRITE, bh);
638 }
639 cond_resched();
640
641
642
643 descriptor = NULL;
644 bufs = 0;
645 }
646 }
647
648
649
650
651
652
653
654
655
656
657
658
659 jbd_debug(3, "JBD: commit phase 4\n");
660
661
662
663
664
665wait_for_iobuf:
666 while (commit_transaction->t_iobuf_list != NULL) {
667 struct buffer_head *bh;
668
669 jh = commit_transaction->t_iobuf_list->b_tprev;
670 bh = jh2bh(jh);
671 if (buffer_locked(bh)) {
672 wait_on_buffer(bh);
673 goto wait_for_iobuf;
674 }
675 if (cond_resched())
676 goto wait_for_iobuf;
677
678 if (unlikely(!buffer_uptodate(bh)))
679 err = -EIO;
680
681 clear_buffer_jwrite(bh);
682
683 JBUFFER_TRACE(jh, "ph4: unfile after journal write");
684 journal_unfile_buffer(journal, jh);
685
686
687
688
689
690 BUFFER_TRACE(bh, "dumping temporary bh");
691 journal_put_journal_head(jh);
692 __brelse(bh);
693 J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
694 free_buffer_head(bh);
695
696
697
698 jh = commit_transaction->t_shadow_list->b_tprev;
699 bh = jh2bh(jh);
700 clear_bit(BH_JWrite, &bh->b_state);
701 J_ASSERT_BH(bh, buffer_jbddirty(bh));
702
703
704
705
706
707 JBUFFER_TRACE(jh, "file as BJ_Forget");
708 journal_file_buffer(jh, commit_transaction, BJ_Forget);
709
710
711 wake_up_bit(&bh->b_state, BH_Unshadow);
712 JBUFFER_TRACE(jh, "brelse shadowed buffer");
713 __brelse(bh);
714 }
715
716 J_ASSERT (commit_transaction->t_shadow_list == NULL);
717
718 jbd_debug(3, "JBD: commit phase 5\n");
719
720
721 wait_for_ctlbuf:
722 while (commit_transaction->t_log_list != NULL) {
723 struct buffer_head *bh;
724
725 jh = commit_transaction->t_log_list->b_tprev;
726 bh = jh2bh(jh);
727 if (buffer_locked(bh)) {
728 wait_on_buffer(bh);
729 goto wait_for_ctlbuf;
730 }
731 if (cond_resched())
732 goto wait_for_ctlbuf;
733
734 if (unlikely(!buffer_uptodate(bh)))
735 err = -EIO;
736
737 BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
738 clear_buffer_jwrite(bh);
739 journal_unfile_buffer(journal, jh);
740 journal_put_journal_head(jh);
741 __brelse(bh);
742
743 }
744
745 jbd_debug(3, "JBD: commit phase 6\n");
746
747 if (journal_write_commit_record(journal, commit_transaction))
748 err = -EIO;
749
750 if (err)
751 journal_abort(journal, err);
752
753
754
755
756
757
758 jbd_debug(3, "JBD: commit phase 7\n");
759
760 J_ASSERT(commit_transaction->t_sync_datalist == NULL);
761 J_ASSERT(commit_transaction->t_buffers == NULL);
762 J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
763 J_ASSERT(commit_transaction->t_iobuf_list == NULL);
764 J_ASSERT(commit_transaction->t_shadow_list == NULL);
765 J_ASSERT(commit_transaction->t_log_list == NULL);
766
767restart_loop:
768
769
770
771
772 spin_lock(&journal->j_list_lock);
773 while (commit_transaction->t_forget) {
774 transaction_t *cp_transaction;
775 struct buffer_head *bh;
776
777 jh = commit_transaction->t_forget;
778 spin_unlock(&journal->j_list_lock);
779 bh = jh2bh(jh);
780 jbd_lock_bh_state(bh);
781 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
782 jh->b_transaction == journal->j_running_transaction);
783
784
785
786
787
788
789
790
791
792
793
794 if (jh->b_committed_data) {
795 jbd_free(jh->b_committed_data, bh->b_size);
796 jh->b_committed_data = NULL;
797 if (jh->b_frozen_data) {
798 jh->b_committed_data = jh->b_frozen_data;
799 jh->b_frozen_data = NULL;
800 }
801 } else if (jh->b_frozen_data) {
802 jbd_free(jh->b_frozen_data, bh->b_size);
803 jh->b_frozen_data = NULL;
804 }
805
806 spin_lock(&journal->j_list_lock);
807 cp_transaction = jh->b_cp_transaction;
808 if (cp_transaction) {
809 JBUFFER_TRACE(jh, "remove from old cp transaction");
810 __journal_remove_checkpoint(jh);
811 }
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827 if (buffer_freed(bh)) {
828 clear_buffer_freed(bh);
829 clear_buffer_jbddirty(bh);
830 }
831
832 if (buffer_jbddirty(bh)) {
833 JBUFFER_TRACE(jh, "add to new checkpointing trans");
834 __journal_insert_checkpoint(jh, commit_transaction);
835 JBUFFER_TRACE(jh, "refile for checkpoint writeback");
836 __journal_refile_buffer(jh);
837 jbd_unlock_bh_state(bh);
838 } else {
839 J_ASSERT_BH(bh, !buffer_dirty(bh));
840
841
842
843
844
845
846
847 JBUFFER_TRACE(jh, "refile or unfile freed buffer");
848 __journal_refile_buffer(jh);
849 if (!jh->b_transaction) {
850 jbd_unlock_bh_state(bh);
851
852 journal_remove_journal_head(bh);
853 release_buffer_page(bh);
854 } else
855 jbd_unlock_bh_state(bh);
856 }
857 cond_resched_lock(&journal->j_list_lock);
858 }
859 spin_unlock(&journal->j_list_lock);
860
861
862
863
864
865
866 spin_lock(&journal->j_state_lock);
867 spin_lock(&journal->j_list_lock);
868
869
870
871
872 if (commit_transaction->t_forget) {
873 spin_unlock(&journal->j_list_lock);
874 spin_unlock(&journal->j_state_lock);
875 goto restart_loop;
876 }
877
878
879
880 jbd_debug(3, "JBD: commit phase 8\n");
881
882 J_ASSERT(commit_transaction->t_state == T_COMMIT);
883
884 commit_transaction->t_state = T_FINISHED;
885 J_ASSERT(commit_transaction == journal->j_committing_transaction);
886 journal->j_commit_sequence = commit_transaction->t_tid;
887 journal->j_committing_transaction = NULL;
888 spin_unlock(&journal->j_state_lock);
889
890 if (commit_transaction->t_checkpoint_list == NULL &&
891 commit_transaction->t_checkpoint_io_list == NULL) {
892 __journal_drop_transaction(journal, commit_transaction);
893 } else {
894 if (journal->j_checkpoint_transactions == NULL) {
895 journal->j_checkpoint_transactions = commit_transaction;
896 commit_transaction->t_cpnext = commit_transaction;
897 commit_transaction->t_cpprev = commit_transaction;
898 } else {
899 commit_transaction->t_cpnext =
900 journal->j_checkpoint_transactions;
901 commit_transaction->t_cpprev =
902 commit_transaction->t_cpnext->t_cpprev;
903 commit_transaction->t_cpnext->t_cpprev =
904 commit_transaction;
905 commit_transaction->t_cpprev->t_cpnext =
906 commit_transaction;
907 }
908 }
909 spin_unlock(&journal->j_list_lock);
910
911 jbd_debug(1, "JBD: commit %d complete, head %d\n",
912 journal->j_commit_sequence, journal->j_tail_sequence);
913
914 wake_up(&journal->j_wait_done_commit);
915}
916