1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include <linux/time.h>
21#include <linux/fs.h>
22#include <linux/jbd.h>
23#include <linux/errno.h>
24#include <linux/slab.h>
25#include <linux/blkdev.h>
26#include <trace/events/jbd.h>
27
28
29
30
31
32
33static inline void __buffer_unlink_first(struct journal_head *jh)
34{
35 transaction_t *transaction = jh->b_cp_transaction;
36
37 jh->b_cpnext->b_cpprev = jh->b_cpprev;
38 jh->b_cpprev->b_cpnext = jh->b_cpnext;
39 if (transaction->t_checkpoint_list == jh) {
40 transaction->t_checkpoint_list = jh->b_cpnext;
41 if (transaction->t_checkpoint_list == jh)
42 transaction->t_checkpoint_list = NULL;
43 }
44}
45
46
47
48
49
50
51static inline void __buffer_unlink(struct journal_head *jh)
52{
53 transaction_t *transaction = jh->b_cp_transaction;
54
55 __buffer_unlink_first(jh);
56 if (transaction->t_checkpoint_io_list == jh) {
57 transaction->t_checkpoint_io_list = jh->b_cpnext;
58 if (transaction->t_checkpoint_io_list == jh)
59 transaction->t_checkpoint_io_list = NULL;
60 }
61}
62
63
64
65
66
67
68static inline void __buffer_relink_io(struct journal_head *jh)
69{
70 transaction_t *transaction = jh->b_cp_transaction;
71
72 __buffer_unlink_first(jh);
73
74 if (!transaction->t_checkpoint_io_list) {
75 jh->b_cpnext = jh->b_cpprev = jh;
76 } else {
77 jh->b_cpnext = transaction->t_checkpoint_io_list;
78 jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
79 jh->b_cpprev->b_cpnext = jh;
80 jh->b_cpnext->b_cpprev = jh;
81 }
82 transaction->t_checkpoint_io_list = jh;
83}
84
85
86
87
88
89
90
91
92
93static int __try_to_free_cp_buf(struct journal_head *jh)
94{
95 int ret = 0;
96 struct buffer_head *bh = jh2bh(jh);
97
98 if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
99 !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
100
101
102
103
104 get_bh(bh);
105 JBUFFER_TRACE(jh, "remove from checkpoint list");
106 ret = __journal_remove_checkpoint(jh) + 1;
107 jbd_unlock_bh_state(bh);
108 BUFFER_TRACE(bh, "release");
109 __brelse(bh);
110 } else {
111 jbd_unlock_bh_state(bh);
112 }
113 return ret;
114}
115
116
117
118
119
120
121
122void __log_wait_for_space(journal_t *journal)
123{
124 int nblocks, space_left;
125 assert_spin_locked(&journal->j_state_lock);
126
127 nblocks = jbd_space_needed(journal);
128 while (__log_space_left(journal) < nblocks) {
129 if (journal->j_flags & JFS_ABORT)
130 return;
131 spin_unlock(&journal->j_state_lock);
132 mutex_lock(&journal->j_checkpoint_mutex);
133
134
135
136
137
138
139
140
141
142
143
144
145 spin_lock(&journal->j_state_lock);
146 spin_lock(&journal->j_list_lock);
147 nblocks = jbd_space_needed(journal);
148 space_left = __log_space_left(journal);
149 if (space_left < nblocks) {
150 int chkpt = journal->j_checkpoint_transactions != NULL;
151 tid_t tid = 0;
152
153 if (journal->j_committing_transaction)
154 tid = journal->j_committing_transaction->t_tid;
155 spin_unlock(&journal->j_list_lock);
156 spin_unlock(&journal->j_state_lock);
157 if (chkpt) {
158 log_do_checkpoint(journal);
159 } else if (cleanup_journal_tail(journal) == 0) {
160
161 ;
162 } else if (tid) {
163 log_wait_commit(journal, tid);
164 } else {
165 printk(KERN_ERR "%s: needed %d blocks and "
166 "only had %d space available\n",
167 __func__, nblocks, space_left);
168 printk(KERN_ERR "%s: no way to get more "
169 "journal space\n", __func__);
170 WARN_ON(1);
171 journal_abort(journal, 0);
172 }
173 spin_lock(&journal->j_state_lock);
174 } else {
175 spin_unlock(&journal->j_list_lock);
176 }
177 mutex_unlock(&journal->j_checkpoint_mutex);
178 }
179}
180
181
182
183
184
185
186static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
187 __releases(journal->j_list_lock)
188{
189 get_bh(bh);
190 spin_unlock(&journal->j_list_lock);
191 jbd_lock_bh_state(bh);
192 jbd_unlock_bh_state(bh);
193 put_bh(bh);
194}
195
196
197
198
199
200
201
202
203
204
205
206
207static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
208{
209 struct journal_head *jh;
210 struct buffer_head *bh;
211 tid_t this_tid;
212 int released = 0;
213 int ret = 0;
214
215 this_tid = transaction->t_tid;
216restart:
217
218 if (journal->j_checkpoint_transactions != transaction ||
219 transaction->t_tid != this_tid)
220 return ret;
221 while (!released && transaction->t_checkpoint_io_list) {
222 jh = transaction->t_checkpoint_io_list;
223 bh = jh2bh(jh);
224 if (!jbd_trylock_bh_state(bh)) {
225 jbd_sync_bh(journal, bh);
226 spin_lock(&journal->j_list_lock);
227 goto restart;
228 }
229 get_bh(bh);
230 if (buffer_locked(bh)) {
231 spin_unlock(&journal->j_list_lock);
232 jbd_unlock_bh_state(bh);
233 wait_on_buffer(bh);
234
235 BUFFER_TRACE(bh, "brelse");
236 __brelse(bh);
237 spin_lock(&journal->j_list_lock);
238 goto restart;
239 }
240 if (unlikely(buffer_write_io_error(bh)))
241 ret = -EIO;
242
243
244
245
246
247 released = __journal_remove_checkpoint(jh);
248 jbd_unlock_bh_state(bh);
249 __brelse(bh);
250 }
251
252 return ret;
253}
254
255#define NR_BATCH 64
256
257static void
258__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
259{
260 int i;
261 struct blk_plug plug;
262
263 blk_start_plug(&plug);
264 for (i = 0; i < *batch_count; i++)
265 write_dirty_buffer(bhs[i], WRITE_SYNC);
266 blk_finish_plug(&plug);
267
268 for (i = 0; i < *batch_count; i++) {
269 struct buffer_head *bh = bhs[i];
270 clear_buffer_jwrite(bh);
271 BUFFER_TRACE(bh, "brelse");
272 __brelse(bh);
273 }
274 *batch_count = 0;
275}
276
277
278
279
280
281
282
283
284
285
286
287static int __process_buffer(journal_t *journal, struct journal_head *jh,
288 struct buffer_head **bhs, int *batch_count)
289{
290 struct buffer_head *bh = jh2bh(jh);
291 int ret = 0;
292
293 if (buffer_locked(bh)) {
294 get_bh(bh);
295 spin_unlock(&journal->j_list_lock);
296 jbd_unlock_bh_state(bh);
297 wait_on_buffer(bh);
298
299 BUFFER_TRACE(bh, "brelse");
300 __brelse(bh);
301 ret = 1;
302 } else if (jh->b_transaction != NULL) {
303 transaction_t *t = jh->b_transaction;
304 tid_t tid = t->t_tid;
305
306 spin_unlock(&journal->j_list_lock);
307 jbd_unlock_bh_state(bh);
308 log_start_commit(journal, tid);
309 log_wait_commit(journal, tid);
310 ret = 1;
311 } else if (!buffer_dirty(bh)) {
312 ret = 1;
313 if (unlikely(buffer_write_io_error(bh)))
314 ret = -EIO;
315 get_bh(bh);
316 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
317 BUFFER_TRACE(bh, "remove from checkpoint");
318 __journal_remove_checkpoint(jh);
319 spin_unlock(&journal->j_list_lock);
320 jbd_unlock_bh_state(bh);
321 __brelse(bh);
322 } else {
323
324
325
326
327
328
329
330 BUFFER_TRACE(bh, "queue");
331 get_bh(bh);
332 J_ASSERT_BH(bh, !buffer_jwrite(bh));
333 set_buffer_jwrite(bh);
334 bhs[*batch_count] = bh;
335 __buffer_relink_io(jh);
336 jbd_unlock_bh_state(bh);
337 (*batch_count)++;
338 if (*batch_count == NR_BATCH) {
339 spin_unlock(&journal->j_list_lock);
340 __flush_batch(journal, bhs, batch_count);
341 ret = 1;
342 }
343 }
344 return ret;
345}
346
347
348
349
350
351
352
353
354
355int log_do_checkpoint(journal_t *journal)
356{
357 transaction_t *transaction;
358 tid_t this_tid;
359 int result;
360
361 jbd_debug(1, "Start checkpoint\n");
362
363
364
365
366
367
368 result = cleanup_journal_tail(journal);
369 trace_jbd_checkpoint(journal, result);
370 jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
371 if (result <= 0)
372 return result;
373
374
375
376
377
378 result = 0;
379 spin_lock(&journal->j_list_lock);
380 if (!journal->j_checkpoint_transactions)
381 goto out;
382 transaction = journal->j_checkpoint_transactions;
383 this_tid = transaction->t_tid;
384restart:
385
386
387
388
389
390 if (journal->j_checkpoint_transactions == transaction &&
391 transaction->t_tid == this_tid) {
392 int batch_count = 0;
393 struct buffer_head *bhs[NR_BATCH];
394 struct journal_head *jh;
395 int retry = 0, err;
396
397 while (!retry && transaction->t_checkpoint_list) {
398 struct buffer_head *bh;
399
400 jh = transaction->t_checkpoint_list;
401 bh = jh2bh(jh);
402 if (!jbd_trylock_bh_state(bh)) {
403 jbd_sync_bh(journal, bh);
404 retry = 1;
405 break;
406 }
407 retry = __process_buffer(journal, jh, bhs,&batch_count);
408 if (retry < 0 && !result)
409 result = retry;
410 if (!retry && (need_resched() ||
411 spin_needbreak(&journal->j_list_lock))) {
412 spin_unlock(&journal->j_list_lock);
413 retry = 1;
414 break;
415 }
416 }
417
418 if (batch_count) {
419 if (!retry) {
420 spin_unlock(&journal->j_list_lock);
421 retry = 1;
422 }
423 __flush_batch(journal, bhs, &batch_count);
424 }
425
426 if (retry) {
427 spin_lock(&journal->j_list_lock);
428 goto restart;
429 }
430
431
432
433
434 err = __wait_cp_io(journal, transaction);
435 if (!result)
436 result = err;
437 }
438out:
439 spin_unlock(&journal->j_list_lock);
440 if (result < 0)
441 journal_abort(journal, result);
442 else
443 result = cleanup_journal_tail(journal);
444
445 return (result < 0) ? result : 0;
446}
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464int cleanup_journal_tail(journal_t *journal)
465{
466 transaction_t * transaction;
467 tid_t first_tid;
468 unsigned int blocknr, freed;
469
470 if (is_journal_aborted(journal))
471 return 1;
472
473
474
475
476
477
478
479
480
481 spin_lock(&journal->j_state_lock);
482 spin_lock(&journal->j_list_lock);
483 transaction = journal->j_checkpoint_transactions;
484 if (transaction) {
485 first_tid = transaction->t_tid;
486 blocknr = transaction->t_log_start;
487 } else if ((transaction = journal->j_committing_transaction) != NULL) {
488 first_tid = transaction->t_tid;
489 blocknr = transaction->t_log_start;
490 } else if ((transaction = journal->j_running_transaction) != NULL) {
491 first_tid = transaction->t_tid;
492 blocknr = journal->j_head;
493 } else {
494 first_tid = journal->j_transaction_sequence;
495 blocknr = journal->j_head;
496 }
497 spin_unlock(&journal->j_list_lock);
498 J_ASSERT(blocknr != 0);
499
500
501
502 if (journal->j_tail_sequence == first_tid) {
503 spin_unlock(&journal->j_state_lock);
504 return 1;
505 }
506 spin_unlock(&journal->j_state_lock);
507
508
509
510
511
512
513
514
515
516
517
518
519
520 journal_update_sb_log_tail(journal, first_tid, blocknr,
521 WRITE_FLUSH_FUA);
522
523 spin_lock(&journal->j_state_lock);
524
525
526
527 freed = blocknr - journal->j_tail;
528 if (blocknr < journal->j_tail)
529 freed = freed + journal->j_last - journal->j_first;
530
531 trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed);
532 jbd_debug(1,
533 "Cleaning journal tail from %d to %d (offset %u), "
534 "freeing %u\n",
535 journal->j_tail_sequence, first_tid, blocknr, freed);
536
537 journal->j_free += freed;
538 journal->j_tail_sequence = first_tid;
539 journal->j_tail = blocknr;
540 spin_unlock(&journal->j_state_lock);
541 return 0;
542}
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
558{
559 struct journal_head *last_jh;
560 struct journal_head *next_jh = jh;
561 int ret, freed = 0;
562
563 *released = 0;
564 if (!jh)
565 return 0;
566
567 last_jh = jh->b_cpprev;
568 do {
569 jh = next_jh;
570 next_jh = jh->b_cpnext;
571
572 if (jbd_trylock_bh_state(jh2bh(jh))) {
573 ret = __try_to_free_cp_buf(jh);
574 if (ret) {
575 freed++;
576 if (ret == 2) {
577 *released = 1;
578 return freed;
579 }
580 }
581 }
582
583
584
585
586
587
588 if (need_resched())
589 return freed;
590 } while (jh != last_jh);
591
592 return freed;
593}
594
595
596
597
598
599
600
601
602
603
604
605int __journal_clean_checkpoint_list(journal_t *journal)
606{
607 transaction_t *transaction, *last_transaction, *next_transaction;
608 int ret = 0;
609 int released;
610
611 transaction = journal->j_checkpoint_transactions;
612 if (!transaction)
613 goto out;
614
615 last_transaction = transaction->t_cpprev;
616 next_transaction = transaction;
617 do {
618 transaction = next_transaction;
619 next_transaction = transaction->t_cpnext;
620 ret += journal_clean_one_cp_list(transaction->
621 t_checkpoint_list, &released);
622
623
624
625
626
627 if (need_resched())
628 goto out;
629 if (released)
630 continue;
631
632
633
634
635
636 ret += journal_clean_one_cp_list(transaction->
637 t_checkpoint_io_list, &released);
638 if (need_resched())
639 goto out;
640 } while (transaction != last_transaction);
641out:
642 return ret;
643}
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665int __journal_remove_checkpoint(struct journal_head *jh)
666{
667 transaction_t *transaction;
668 journal_t *journal;
669 int ret = 0;
670
671 JBUFFER_TRACE(jh, "entry");
672
673 if ((transaction = jh->b_cp_transaction) == NULL) {
674 JBUFFER_TRACE(jh, "not on transaction");
675 goto out;
676 }
677 journal = transaction->t_journal;
678
679 JBUFFER_TRACE(jh, "removing from transaction");
680 __buffer_unlink(jh);
681 jh->b_cp_transaction = NULL;
682 journal_put_journal_head(jh);
683
684 if (transaction->t_checkpoint_list != NULL ||
685 transaction->t_checkpoint_io_list != NULL)
686 goto out;
687
688
689
690
691
692
693
694
695
696
697 if (transaction->t_state != T_FINISHED)
698 goto out;
699
700
701
702
703 __journal_drop_transaction(journal, transaction);
704
705
706
707 wake_up(&journal->j_wait_logspace);
708 ret = 1;
709out:
710 return ret;
711}
712
713
714
715
716
717
718
719
720
721void __journal_insert_checkpoint(struct journal_head *jh,
722 transaction_t *transaction)
723{
724 JBUFFER_TRACE(jh, "entry");
725 J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
726 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
727
728
729 journal_grab_journal_head(jh2bh(jh));
730 jh->b_cp_transaction = transaction;
731
732 if (!transaction->t_checkpoint_list) {
733 jh->b_cpnext = jh->b_cpprev = jh;
734 } else {
735 jh->b_cpnext = transaction->t_checkpoint_list;
736 jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev;
737 jh->b_cpprev->b_cpnext = jh;
738 jh->b_cpnext->b_cpprev = jh;
739 }
740 transaction->t_checkpoint_list = jh;
741}
742
743
744
745
746
747
748
749
750
751
752
753void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
754{
755 assert_spin_locked(&journal->j_list_lock);
756 if (transaction->t_cpnext) {
757 transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
758 transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
759 if (journal->j_checkpoint_transactions == transaction)
760 journal->j_checkpoint_transactions =
761 transaction->t_cpnext;
762 if (journal->j_checkpoint_transactions == transaction)
763 journal->j_checkpoint_transactions = NULL;
764 }
765
766 J_ASSERT(transaction->t_state == T_FINISHED);
767 J_ASSERT(transaction->t_buffers == NULL);
768 J_ASSERT(transaction->t_sync_datalist == NULL);
769 J_ASSERT(transaction->t_forget == NULL);
770 J_ASSERT(transaction->t_iobuf_list == NULL);
771 J_ASSERT(transaction->t_shadow_list == NULL);
772 J_ASSERT(transaction->t_log_list == NULL);
773 J_ASSERT(transaction->t_checkpoint_list == NULL);
774 J_ASSERT(transaction->t_checkpoint_io_list == NULL);
775 J_ASSERT(transaction->t_updates == 0);
776 J_ASSERT(journal->j_committing_transaction != transaction);
777 J_ASSERT(journal->j_running_transaction != transaction);
778
779 trace_jbd_drop_transaction(journal, transaction);
780 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
781 kfree(transaction);
782}
783