1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26#include <linux/fs.h>
27#include <linux/types.h>
28#include <linux/slab.h>
29#include <linux/highmem.h>
30#include <linux/kthread.h>
31#include <linux/time.h>
32#include <linux/random.h>
33#include <linux/delay.h>
34
35#include <cluster/masklog.h>
36
37#include "ocfs2.h"
38
39#include "alloc.h"
40#include "blockcheck.h"
41#include "dir.h"
42#include "dlmglue.h"
43#include "extent_map.h"
44#include "heartbeat.h"
45#include "inode.h"
46#include "journal.h"
47#include "localalloc.h"
48#include "slot_map.h"
49#include "super.h"
50#include "sysfile.h"
51#include "uptodate.h"
52#include "quota.h"
53#include "file.h"
54#include "namei.h"
55
56#include "buffer_head_io.h"
57#include "ocfs2_trace.h"
58
59DEFINE_SPINLOCK(trans_inc_lock);
60
61#define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000
62
63static int ocfs2_force_read_journal(struct inode *inode);
64static int ocfs2_recover_node(struct ocfs2_super *osb,
65 int node_num, int slot_num);
66static int __ocfs2_recovery_thread(void *arg);
67static int ocfs2_commit_cache(struct ocfs2_super *osb);
68static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota);
69static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
70 int dirty, int replayed);
71static int ocfs2_trylock_journal(struct ocfs2_super *osb,
72 int slot_num);
73static int ocfs2_recover_orphans(struct ocfs2_super *osb,
74 int slot,
75 enum ocfs2_orphan_reco_type orphan_reco_type);
76static int ocfs2_commit_thread(void *arg);
77static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
78 int slot_num,
79 struct ocfs2_dinode *la_dinode,
80 struct ocfs2_dinode *tl_dinode,
81 struct ocfs2_quota_recovery *qrec,
82 enum ocfs2_orphan_reco_type orphan_reco_type);
83
84static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
85{
86 return __ocfs2_wait_on_mount(osb, 0);
87}
88
89static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
90{
91 return __ocfs2_wait_on_mount(osb, 1);
92}
93
94
95
96
97
98
99enum ocfs2_replay_state {
100 REPLAY_UNNEEDED = 0,
101 REPLAY_NEEDED,
102 REPLAY_DONE
103};
104
105struct ocfs2_replay_map {
106 unsigned int rm_slots;
107 enum ocfs2_replay_state rm_state;
108 unsigned char rm_replay_slots[0];
109};
110
111static void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
112{
113 if (!osb->replay_map)
114 return;
115
116
117 if (osb->replay_map->rm_state == REPLAY_DONE)
118 return;
119
120 osb->replay_map->rm_state = state;
121}
122
123int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
124{
125 struct ocfs2_replay_map *replay_map;
126 int i, node_num;
127
128
129 if (osb->replay_map)
130 return 0;
131
132 replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
133 (osb->max_slots * sizeof(char)), GFP_KERNEL);
134
135 if (!replay_map) {
136 mlog_errno(-ENOMEM);
137 return -ENOMEM;
138 }
139
140 spin_lock(&osb->osb_lock);
141
142 replay_map->rm_slots = osb->max_slots;
143 replay_map->rm_state = REPLAY_UNNEEDED;
144
145
146 for (i = 0; i < replay_map->rm_slots; i++) {
147 if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT)
148 replay_map->rm_replay_slots[i] = 1;
149 }
150
151 osb->replay_map = replay_map;
152 spin_unlock(&osb->osb_lock);
153 return 0;
154}
155
156static void ocfs2_queue_replay_slots(struct ocfs2_super *osb,
157 enum ocfs2_orphan_reco_type orphan_reco_type)
158{
159 struct ocfs2_replay_map *replay_map = osb->replay_map;
160 int i;
161
162 if (!replay_map)
163 return;
164
165 if (replay_map->rm_state != REPLAY_NEEDED)
166 return;
167
168 for (i = 0; i < replay_map->rm_slots; i++)
169 if (replay_map->rm_replay_slots[i])
170 ocfs2_queue_recovery_completion(osb->journal, i, NULL,
171 NULL, NULL,
172 orphan_reco_type);
173 replay_map->rm_state = REPLAY_DONE;
174}
175
176static void ocfs2_free_replay_slots(struct ocfs2_super *osb)
177{
178 struct ocfs2_replay_map *replay_map = osb->replay_map;
179
180 if (!osb->replay_map)
181 return;
182
183 kfree(replay_map);
184 osb->replay_map = NULL;
185}
186
187int ocfs2_recovery_init(struct ocfs2_super *osb)
188{
189 struct ocfs2_recovery_map *rm;
190
191 mutex_init(&osb->recovery_lock);
192 osb->disable_recovery = 0;
193 osb->recovery_thread_task = NULL;
194 init_waitqueue_head(&osb->recovery_event);
195
196 rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
197 osb->max_slots * sizeof(unsigned int),
198 GFP_KERNEL);
199 if (!rm) {
200 mlog_errno(-ENOMEM);
201 return -ENOMEM;
202 }
203
204 rm->rm_entries = (unsigned int *)((char *)rm +
205 sizeof(struct ocfs2_recovery_map));
206 osb->recovery_map = rm;
207
208 return 0;
209}
210
211
212
213
214static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
215{
216 mb();
217 return osb->recovery_thread_task != NULL;
218}
219
220void ocfs2_recovery_exit(struct ocfs2_super *osb)
221{
222 struct ocfs2_recovery_map *rm;
223
224
225
226 mutex_lock(&osb->recovery_lock);
227 osb->disable_recovery = 1;
228 mutex_unlock(&osb->recovery_lock);
229 wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
230
231
232
233
234 flush_workqueue(ocfs2_wq);
235
236
237
238
239
240 rm = osb->recovery_map;
241
242
243 kfree(rm);
244}
245
246static int __ocfs2_recovery_map_test(struct ocfs2_super *osb,
247 unsigned int node_num)
248{
249 int i;
250 struct ocfs2_recovery_map *rm = osb->recovery_map;
251
252 assert_spin_locked(&osb->osb_lock);
253
254 for (i = 0; i < rm->rm_used; i++) {
255 if (rm->rm_entries[i] == node_num)
256 return 1;
257 }
258
259 return 0;
260}
261
262
263static int ocfs2_recovery_map_set(struct ocfs2_super *osb,
264 unsigned int node_num)
265{
266 struct ocfs2_recovery_map *rm = osb->recovery_map;
267
268 spin_lock(&osb->osb_lock);
269 if (__ocfs2_recovery_map_test(osb, node_num)) {
270 spin_unlock(&osb->osb_lock);
271 return 1;
272 }
273
274
275 BUG_ON(rm->rm_used >= osb->max_slots);
276
277 rm->rm_entries[rm->rm_used] = node_num;
278 rm->rm_used++;
279 spin_unlock(&osb->osb_lock);
280
281 return 0;
282}
283
284static void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
285 unsigned int node_num)
286{
287 int i;
288 struct ocfs2_recovery_map *rm = osb->recovery_map;
289
290 spin_lock(&osb->osb_lock);
291
292 for (i = 0; i < rm->rm_used; i++) {
293 if (rm->rm_entries[i] == node_num)
294 break;
295 }
296
297 if (i < rm->rm_used) {
298
299 memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]),
300 (rm->rm_used - i - 1) * sizeof(unsigned int));
301 rm->rm_used--;
302 }
303
304 spin_unlock(&osb->osb_lock);
305}
306
307static int ocfs2_commit_cache(struct ocfs2_super *osb)
308{
309 int status = 0;
310 unsigned int flushed;
311 struct ocfs2_journal *journal = NULL;
312
313 journal = osb->journal;
314
315
316 down_write(&journal->j_trans_barrier);
317
318 flushed = atomic_read(&journal->j_num_trans);
319 trace_ocfs2_commit_cache_begin(flushed);
320 if (flushed == 0) {
321 up_write(&journal->j_trans_barrier);
322 goto finally;
323 }
324
325 jbd2_journal_lock_updates(journal->j_journal);
326 status = jbd2_journal_flush(journal->j_journal);
327 jbd2_journal_unlock_updates(journal->j_journal);
328 if (status < 0) {
329 up_write(&journal->j_trans_barrier);
330 mlog_errno(status);
331 goto finally;
332 }
333
334 ocfs2_inc_trans_id(journal);
335
336 flushed = atomic_read(&journal->j_num_trans);
337 atomic_set(&journal->j_num_trans, 0);
338 up_write(&journal->j_trans_barrier);
339
340 trace_ocfs2_commit_cache_end(journal->j_trans_id, flushed);
341
342 ocfs2_wake_downconvert_thread(osb);
343 wake_up(&journal->j_checkpointed);
344finally:
345 return status;
346}
347
348handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
349{
350 journal_t *journal = osb->journal->j_journal;
351 handle_t *handle;
352
353 BUG_ON(!osb || !osb->journal->j_journal);
354
355 if (ocfs2_is_hard_readonly(osb))
356 return ERR_PTR(-EROFS);
357
358 BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
359 BUG_ON(max_buffs <= 0);
360
361
362 if (journal_current_handle())
363 return jbd2_journal_start(journal, max_buffs);
364
365 sb_start_intwrite(osb->sb);
366
367 down_read(&osb->journal->j_trans_barrier);
368
369 handle = jbd2_journal_start(journal, max_buffs);
370 if (IS_ERR(handle)) {
371 up_read(&osb->journal->j_trans_barrier);
372 sb_end_intwrite(osb->sb);
373
374 mlog_errno(PTR_ERR(handle));
375
376 if (is_journal_aborted(journal)) {
377 ocfs2_abort(osb->sb, "Detected aborted journal\n");
378 handle = ERR_PTR(-EROFS);
379 }
380 } else {
381 if (!ocfs2_mount_local(osb))
382 atomic_inc(&(osb->journal->j_num_trans));
383 }
384
385 return handle;
386}
387
388int ocfs2_commit_trans(struct ocfs2_super *osb,
389 handle_t *handle)
390{
391 int ret, nested;
392 struct ocfs2_journal *journal = osb->journal;
393
394 BUG_ON(!handle);
395
396 nested = handle->h_ref > 1;
397 ret = jbd2_journal_stop(handle);
398 if (ret < 0)
399 mlog_errno(ret);
400
401 if (!nested) {
402 up_read(&journal->j_trans_barrier);
403 sb_end_intwrite(osb->sb);
404 }
405
406 return ret;
407}
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426int ocfs2_extend_trans(handle_t *handle, int nblocks)
427{
428 int status, old_nblocks;
429
430 BUG_ON(!handle);
431 BUG_ON(nblocks < 0);
432
433 if (!nblocks)
434 return 0;
435
436 old_nblocks = handle->h_buffer_credits;
437
438 trace_ocfs2_extend_trans(old_nblocks, nblocks);
439
440#ifdef CONFIG_OCFS2_DEBUG_FS
441 status = 1;
442#else
443 status = jbd2_journal_extend(handle, nblocks);
444 if (status < 0) {
445 mlog_errno(status);
446 goto bail;
447 }
448#endif
449
450 if (status > 0) {
451 trace_ocfs2_extend_trans_restart(old_nblocks + nblocks);
452 status = jbd2_journal_restart(handle,
453 old_nblocks + nblocks);
454 if (status < 0) {
455 mlog_errno(status);
456 goto bail;
457 }
458 }
459
460 status = 0;
461bail:
462 return status;
463}
464
465
466
467
468
469
470
471int ocfs2_allocate_extend_trans(handle_t *handle, int thresh)
472{
473 int status, old_nblks;
474
475 BUG_ON(!handle);
476
477 old_nblks = handle->h_buffer_credits;
478 trace_ocfs2_allocate_extend_trans(old_nblks, thresh);
479
480 if (old_nblks < thresh)
481 return 0;
482
483 status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA);
484 if (status < 0) {
485 mlog_errno(status);
486 goto bail;
487 }
488
489 if (status > 0) {
490 status = jbd2_journal_restart(handle, OCFS2_MAX_TRANS_DATA);
491 if (status < 0)
492 mlog_errno(status);
493 }
494
495bail:
496 return status;
497}
498
499
500struct ocfs2_triggers {
501 struct jbd2_buffer_trigger_type ot_triggers;
502 int ot_offset;
503};
504
505static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
506{
507 return container_of(triggers, struct ocfs2_triggers, ot_triggers);
508}
509
510static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
511 struct buffer_head *bh,
512 void *data, size_t size)
513{
514 struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
515
516
517
518
519
520
521
522 ocfs2_block_check_compute(data, size, data + ot->ot_offset);
523}
524
525
526
527
528
529static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
530 struct buffer_head *bh,
531 void *data, size_t size)
532{
533 struct ocfs2_disk_dqtrailer *dqt =
534 ocfs2_block_dqtrailer(size, data);
535
536
537
538
539
540
541
542 ocfs2_block_check_compute(data, size, &dqt->dq_check);
543}
544
545
546
547
548
549static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
550 struct buffer_head *bh,
551 void *data, size_t size)
552{
553 struct ocfs2_dir_block_trailer *trailer =
554 ocfs2_dir_trailer_from_size(size, data);
555
556
557
558
559
560
561
562 ocfs2_block_check_compute(data, size, &trailer->db_check);
563}
564
565static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
566 struct buffer_head *bh)
567{
568 mlog(ML_ERROR,
569 "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, "
570 "bh->b_blocknr = %llu\n",
571 (unsigned long)bh,
572 (unsigned long long)bh->b_blocknr);
573
574 ocfs2_error(bh->b_bdev->bd_super,
575 "JBD2 has aborted our journal, ocfs2 cannot continue\n");
576}
577
578static struct ocfs2_triggers di_triggers = {
579 .ot_triggers = {
580 .t_frozen = ocfs2_frozen_trigger,
581 .t_abort = ocfs2_abort_trigger,
582 },
583 .ot_offset = offsetof(struct ocfs2_dinode, i_check),
584};
585
586static struct ocfs2_triggers eb_triggers = {
587 .ot_triggers = {
588 .t_frozen = ocfs2_frozen_trigger,
589 .t_abort = ocfs2_abort_trigger,
590 },
591 .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
592};
593
594static struct ocfs2_triggers rb_triggers = {
595 .ot_triggers = {
596 .t_frozen = ocfs2_frozen_trigger,
597 .t_abort = ocfs2_abort_trigger,
598 },
599 .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
600};
601
602static struct ocfs2_triggers gd_triggers = {
603 .ot_triggers = {
604 .t_frozen = ocfs2_frozen_trigger,
605 .t_abort = ocfs2_abort_trigger,
606 },
607 .ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
608};
609
610static struct ocfs2_triggers db_triggers = {
611 .ot_triggers = {
612 .t_frozen = ocfs2_db_frozen_trigger,
613 .t_abort = ocfs2_abort_trigger,
614 },
615};
616
617static struct ocfs2_triggers xb_triggers = {
618 .ot_triggers = {
619 .t_frozen = ocfs2_frozen_trigger,
620 .t_abort = ocfs2_abort_trigger,
621 },
622 .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
623};
624
625static struct ocfs2_triggers dq_triggers = {
626 .ot_triggers = {
627 .t_frozen = ocfs2_dq_frozen_trigger,
628 .t_abort = ocfs2_abort_trigger,
629 },
630};
631
632static struct ocfs2_triggers dr_triggers = {
633 .ot_triggers = {
634 .t_frozen = ocfs2_frozen_trigger,
635 .t_abort = ocfs2_abort_trigger,
636 },
637 .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
638};
639
640static struct ocfs2_triggers dl_triggers = {
641 .ot_triggers = {
642 .t_frozen = ocfs2_frozen_trigger,
643 .t_abort = ocfs2_abort_trigger,
644 },
645 .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
646};
647
648static int __ocfs2_journal_access(handle_t *handle,
649 struct ocfs2_caching_info *ci,
650 struct buffer_head *bh,
651 struct ocfs2_triggers *triggers,
652 int type)
653{
654 int status;
655 struct ocfs2_super *osb =
656 OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
657
658 BUG_ON(!ci || !ci->ci_ops);
659 BUG_ON(!handle);
660 BUG_ON(!bh);
661
662 trace_ocfs2_journal_access(
663 (unsigned long long)ocfs2_metadata_cache_owner(ci),
664 (unsigned long long)bh->b_blocknr, type, bh->b_size);
665
666
667 if (!buffer_uptodate(bh)) {
668 mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n");
669 mlog(ML_ERROR, "b_blocknr=%llu\n",
670 (unsigned long long)bh->b_blocknr);
671
672 lock_buffer(bh);
673
674
675
676
677
678 if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) {
679 clear_buffer_write_io_error(bh);
680 set_buffer_uptodate(bh);
681 }
682
683 if (!buffer_uptodate(bh)) {
684 unlock_buffer(bh);
685 return -EIO;
686 }
687 unlock_buffer(bh);
688 }
689
690
691
692
693
694
695
696 ocfs2_set_ci_lock_trans(osb->journal, ci);
697
698 ocfs2_metadata_cache_io_lock(ci);
699 switch (type) {
700 case OCFS2_JOURNAL_ACCESS_CREATE:
701 case OCFS2_JOURNAL_ACCESS_WRITE:
702 status = jbd2_journal_get_write_access(handle, bh);
703 break;
704
705 case OCFS2_JOURNAL_ACCESS_UNDO:
706 status = jbd2_journal_get_undo_access(handle, bh);
707 break;
708
709 default:
710 status = -EINVAL;
711 mlog(ML_ERROR, "Unknown access type!\n");
712 }
713 if (!status && ocfs2_meta_ecc(osb) && triggers)
714 jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
715 ocfs2_metadata_cache_io_unlock(ci);
716
717 if (status < 0)
718 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n",
719 status, type);
720
721 return status;
722}
723
724int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
725 struct buffer_head *bh, int type)
726{
727 return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
728}
729
730int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
731 struct buffer_head *bh, int type)
732{
733 return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
734}
735
736int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
737 struct buffer_head *bh, int type)
738{
739 return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
740 type);
741}
742
743int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
744 struct buffer_head *bh, int type)
745{
746 return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
747}
748
749int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
750 struct buffer_head *bh, int type)
751{
752 return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
753}
754
755int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
756 struct buffer_head *bh, int type)
757{
758 return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
759}
760
761int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
762 struct buffer_head *bh, int type)
763{
764 return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
765}
766
767int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
768 struct buffer_head *bh, int type)
769{
770 return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
771}
772
773int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
774 struct buffer_head *bh, int type)
775{
776 return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
777}
778
779int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
780 struct buffer_head *bh, int type)
781{
782 return __ocfs2_journal_access(handle, ci, bh, NULL, type);
783}
784
785void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
786{
787 int status;
788
789 trace_ocfs2_journal_dirty((unsigned long long)bh->b_blocknr);
790
791 status = jbd2_journal_dirty_metadata(handle, bh);
792 if (status) {
793 mlog_errno(status);
794 if (!is_handle_aborted(handle)) {
795 journal_t *journal = handle->h_transaction->t_journal;
796 struct super_block *sb = bh->b_bdev->bd_super;
797
798 mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. "
799 "Aborting transaction and journal.\n");
800 handle->h_err = status;
801 jbd2_journal_abort_handle(handle);
802 jbd2_journal_abort(journal, status);
803 ocfs2_abort(sb, "Journal already aborted.\n");
804 }
805 }
806}
807
808#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
809
810void ocfs2_set_journal_params(struct ocfs2_super *osb)
811{
812 journal_t *journal = osb->journal->j_journal;
813 unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
814
815 if (osb->osb_commit_interval)
816 commit_interval = osb->osb_commit_interval;
817
818 write_lock(&journal->j_state_lock);
819 journal->j_commit_interval = commit_interval;
820 if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
821 journal->j_flags |= JBD2_BARRIER;
822 else
823 journal->j_flags &= ~JBD2_BARRIER;
824 write_unlock(&journal->j_state_lock);
825}
826
827int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
828{
829 int status = -1;
830 struct inode *inode = NULL;
831 journal_t *j_journal = NULL;
832 struct ocfs2_dinode *di = NULL;
833 struct buffer_head *bh = NULL;
834 struct ocfs2_super *osb;
835 int inode_lock = 0;
836
837 BUG_ON(!journal);
838
839 osb = journal->j_osb;
840
841
842 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
843 osb->slot_num);
844 if (inode == NULL) {
845 status = -EACCES;
846 mlog_errno(status);
847 goto done;
848 }
849 if (is_bad_inode(inode)) {
850 mlog(ML_ERROR, "access error (bad inode)\n");
851 iput(inode);
852 inode = NULL;
853 status = -EACCES;
854 goto done;
855 }
856
857 SET_INODE_JOURNAL(inode);
858 OCFS2_I(inode)->ip_open_count++;
859
860
861
862
863 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
864 if (status < 0) {
865 if (status != -ERESTARTSYS)
866 mlog(ML_ERROR, "Could not get lock on journal!\n");
867 goto done;
868 }
869
870 inode_lock = 1;
871 di = (struct ocfs2_dinode *)bh->b_data;
872
873 if (i_size_read(inode) < OCFS2_MIN_JOURNAL_SIZE) {
874 mlog(ML_ERROR, "Journal file size (%lld) is too small!\n",
875 i_size_read(inode));
876 status = -EINVAL;
877 goto done;
878 }
879
880 trace_ocfs2_journal_init(i_size_read(inode),
881 (unsigned long long)inode->i_blocks,
882 OCFS2_I(inode)->ip_clusters);
883
884
885 j_journal = jbd2_journal_init_inode(inode);
886 if (j_journal == NULL) {
887 mlog(ML_ERROR, "Linux journal layer error\n");
888 status = -EINVAL;
889 goto done;
890 }
891
892 trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen);
893
894 *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
895 OCFS2_JOURNAL_DIRTY_FL);
896
897 journal->j_journal = j_journal;
898 journal->j_inode = inode;
899 journal->j_bh = bh;
900
901 ocfs2_set_journal_params(osb);
902
903 journal->j_state = OCFS2_JOURNAL_LOADED;
904
905 status = 0;
906done:
907 if (status < 0) {
908 if (inode_lock)
909 ocfs2_inode_unlock(inode, 1);
910 brelse(bh);
911 if (inode) {
912 OCFS2_I(inode)->ip_open_count--;
913 iput(inode);
914 }
915 }
916
917 return status;
918}
919
920static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
921{
922 le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
923}
924
925static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
926{
927 return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
928}
929
930static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
931 int dirty, int replayed)
932{
933 int status;
934 unsigned int flags;
935 struct ocfs2_journal *journal = osb->journal;
936 struct buffer_head *bh = journal->j_bh;
937 struct ocfs2_dinode *fe;
938
939 fe = (struct ocfs2_dinode *)bh->b_data;
940
941
942
943
944 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
945
946 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
947 if (dirty)
948 flags |= OCFS2_JOURNAL_DIRTY_FL;
949 else
950 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
951 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
952
953 if (replayed)
954 ocfs2_bump_recovery_generation(fe);
955
956 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
957 status = ocfs2_write_block(osb, bh, INODE_CACHE(journal->j_inode));
958 if (status < 0)
959 mlog_errno(status);
960
961 return status;
962}
963
964
965
966
967
968void ocfs2_journal_shutdown(struct ocfs2_super *osb)
969{
970 struct ocfs2_journal *journal = NULL;
971 int status = 0;
972 struct inode *inode = NULL;
973 int num_running_trans = 0;
974
975 BUG_ON(!osb);
976
977 journal = osb->journal;
978 if (!journal)
979 goto done;
980
981 inode = journal->j_inode;
982
983 if (journal->j_state != OCFS2_JOURNAL_LOADED)
984 goto done;
985
986
987 if (!igrab(inode))
988 BUG();
989
990 num_running_trans = atomic_read(&(osb->journal->j_num_trans));
991 trace_ocfs2_journal_shutdown(num_running_trans);
992
993
994
995
996
997 journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN;
998
999
1000
1001
1002 if (osb->commit_task) {
1003
1004 trace_ocfs2_journal_shutdown_wait(osb->commit_task);
1005 kthread_stop(osb->commit_task);
1006 osb->commit_task = NULL;
1007 }
1008
1009 BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
1010
1011 if (ocfs2_mount_local(osb)) {
1012 jbd2_journal_lock_updates(journal->j_journal);
1013 status = jbd2_journal_flush(journal->j_journal);
1014 jbd2_journal_unlock_updates(journal->j_journal);
1015 if (status < 0)
1016 mlog_errno(status);
1017 }
1018
1019 if (status == 0) {
1020
1021
1022
1023
1024 status = ocfs2_journal_toggle_dirty(osb, 0, 0);
1025 if (status < 0)
1026 mlog_errno(status);
1027 }
1028
1029
1030 jbd2_journal_destroy(journal->j_journal);
1031 journal->j_journal = NULL;
1032
1033 OCFS2_I(inode)->ip_open_count--;
1034
1035
1036 ocfs2_inode_unlock(inode, 1);
1037
1038 brelse(journal->j_bh);
1039 journal->j_bh = NULL;
1040
1041 journal->j_state = OCFS2_JOURNAL_FREE;
1042
1043
1044done:
1045 iput(inode);
1046}
1047
1048static void ocfs2_clear_journal_error(struct super_block *sb,
1049 journal_t *journal,
1050 int slot)
1051{
1052 int olderr;
1053
1054 olderr = jbd2_journal_errno(journal);
1055 if (olderr) {
1056 mlog(ML_ERROR, "File system error %d recorded in "
1057 "journal %u.\n", olderr, slot);
1058 mlog(ML_ERROR, "File system on device %s needs checking.\n",
1059 sb->s_id);
1060
1061 jbd2_journal_ack_err(journal);
1062 jbd2_journal_clear_err(journal);
1063 }
1064}
1065
1066int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
1067{
1068 int status = 0;
1069 struct ocfs2_super *osb;
1070
1071 BUG_ON(!journal);
1072
1073 osb = journal->j_osb;
1074
1075 status = jbd2_journal_load(journal->j_journal);
1076 if (status < 0) {
1077 mlog(ML_ERROR, "Failed to load journal!\n");
1078 goto done;
1079 }
1080
1081 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
1082
1083 status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
1084 if (status < 0) {
1085 mlog_errno(status);
1086 goto done;
1087 }
1088
1089
1090 if (!local) {
1091 osb->commit_task = kthread_run(ocfs2_commit_thread, osb,
1092 "ocfs2cmt-%s", osb->uuid_str);
1093 if (IS_ERR(osb->commit_task)) {
1094 status = PTR_ERR(osb->commit_task);
1095 osb->commit_task = NULL;
1096 mlog(ML_ERROR, "unable to launch ocfs2commit thread, "
1097 "error=%d", status);
1098 goto done;
1099 }
1100 } else
1101 osb->commit_task = NULL;
1102
1103done:
1104 return status;
1105}
1106
1107
1108
1109
1110int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
1111{
1112 int status;
1113
1114 BUG_ON(!journal);
1115
1116 status = jbd2_journal_wipe(journal->j_journal, full);
1117 if (status < 0) {
1118 mlog_errno(status);
1119 goto bail;
1120 }
1121
1122 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
1123 if (status < 0)
1124 mlog_errno(status);
1125
1126bail:
1127 return status;
1128}
1129
1130static int ocfs2_recovery_completed(struct ocfs2_super *osb)
1131{
1132 int empty;
1133 struct ocfs2_recovery_map *rm = osb->recovery_map;
1134
1135 spin_lock(&osb->osb_lock);
1136 empty = (rm->rm_used == 0);
1137 spin_unlock(&osb->osb_lock);
1138
1139 return empty;
1140}
1141
1142void ocfs2_wait_for_recovery(struct ocfs2_super *osb)
1143{
1144 wait_event(osb->recovery_event, ocfs2_recovery_completed(osb));
1145}
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157static int ocfs2_force_read_journal(struct inode *inode)
1158{
1159 int status = 0;
1160 int i;
1161 u64 v_blkno, p_blkno, p_blocks, num_blocks;
1162#define CONCURRENT_JOURNAL_FILL 32ULL
1163 struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
1164
1165 memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
1166
1167 num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
1168 v_blkno = 0;
1169 while (v_blkno < num_blocks) {
1170 status = ocfs2_extent_map_get_blocks(inode, v_blkno,
1171 &p_blkno, &p_blocks, NULL);
1172 if (status < 0) {
1173 mlog_errno(status);
1174 goto bail;
1175 }
1176
1177 if (p_blocks > CONCURRENT_JOURNAL_FILL)
1178 p_blocks = CONCURRENT_JOURNAL_FILL;
1179
1180
1181
1182 status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
1183 p_blkno, p_blocks, bhs);
1184 if (status < 0) {
1185 mlog_errno(status);
1186 goto bail;
1187 }
1188
1189 for(i = 0; i < p_blocks; i++) {
1190 brelse(bhs[i]);
1191 bhs[i] = NULL;
1192 }
1193
1194 v_blkno += p_blocks;
1195 }
1196
1197bail:
1198 for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
1199 brelse(bhs[i]);
1200 return status;
1201}
1202
1203struct ocfs2_la_recovery_item {
1204 struct list_head lri_list;
1205 int lri_slot;
1206 struct ocfs2_dinode *lri_la_dinode;
1207 struct ocfs2_dinode *lri_tl_dinode;
1208 struct ocfs2_quota_recovery *lri_qrec;
1209 enum ocfs2_orphan_reco_type lri_orphan_reco_type;
1210};
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222void ocfs2_complete_recovery(struct work_struct *work)
1223{
1224 int ret = 0;
1225 struct ocfs2_journal *journal =
1226 container_of(work, struct ocfs2_journal, j_recovery_work);
1227 struct ocfs2_super *osb = journal->j_osb;
1228 struct ocfs2_dinode *la_dinode, *tl_dinode;
1229 struct ocfs2_la_recovery_item *item, *n;
1230 struct ocfs2_quota_recovery *qrec;
1231 enum ocfs2_orphan_reco_type orphan_reco_type;
1232 LIST_HEAD(tmp_la_list);
1233
1234 trace_ocfs2_complete_recovery(
1235 (unsigned long long)OCFS2_I(journal->j_inode)->ip_blkno);
1236
1237 spin_lock(&journal->j_lock);
1238 list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
1239 spin_unlock(&journal->j_lock);
1240
1241 list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
1242 list_del_init(&item->lri_list);
1243
1244 ocfs2_wait_on_quotas(osb);
1245
1246 la_dinode = item->lri_la_dinode;
1247 tl_dinode = item->lri_tl_dinode;
1248 qrec = item->lri_qrec;
1249 orphan_reco_type = item->lri_orphan_reco_type;
1250
1251 trace_ocfs2_complete_recovery_slot(item->lri_slot,
1252 la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0,
1253 tl_dinode ? le64_to_cpu(tl_dinode->i_blkno) : 0,
1254 qrec);
1255
1256 if (la_dinode) {
1257 ret = ocfs2_complete_local_alloc_recovery(osb,
1258 la_dinode);
1259 if (ret < 0)
1260 mlog_errno(ret);
1261
1262 kfree(la_dinode);
1263 }
1264
1265 if (tl_dinode) {
1266 ret = ocfs2_complete_truncate_log_recovery(osb,
1267 tl_dinode);
1268 if (ret < 0)
1269 mlog_errno(ret);
1270
1271 kfree(tl_dinode);
1272 }
1273
1274 ret = ocfs2_recover_orphans(osb, item->lri_slot,
1275 orphan_reco_type);
1276 if (ret < 0)
1277 mlog_errno(ret);
1278
1279 if (qrec) {
1280 ret = ocfs2_finish_quota_recovery(osb, qrec,
1281 item->lri_slot);
1282 if (ret < 0)
1283 mlog_errno(ret);
1284
1285 }
1286
1287 kfree(item);
1288 }
1289
1290 trace_ocfs2_complete_recovery_end(ret);
1291}
1292
1293
1294
1295
1296static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
1297 int slot_num,
1298 struct ocfs2_dinode *la_dinode,
1299 struct ocfs2_dinode *tl_dinode,
1300 struct ocfs2_quota_recovery *qrec,
1301 enum ocfs2_orphan_reco_type orphan_reco_type)
1302{
1303 struct ocfs2_la_recovery_item *item;
1304
1305 item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS);
1306 if (!item) {
1307
1308
1309
1310 kfree(la_dinode);
1311 kfree(tl_dinode);
1312
1313 if (qrec)
1314 ocfs2_free_quota_recovery(qrec);
1315
1316 mlog_errno(-ENOMEM);
1317 return;
1318 }
1319
1320 INIT_LIST_HEAD(&item->lri_list);
1321 item->lri_la_dinode = la_dinode;
1322 item->lri_slot = slot_num;
1323 item->lri_tl_dinode = tl_dinode;
1324 item->lri_qrec = qrec;
1325 item->lri_orphan_reco_type = orphan_reco_type;
1326
1327 spin_lock(&journal->j_lock);
1328 list_add_tail(&item->lri_list, &journal->j_la_cleanups);
1329 queue_work(ocfs2_wq, &journal->j_recovery_work);
1330 spin_unlock(&journal->j_lock);
1331}
1332
1333
1334
1335void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
1336{
1337 struct ocfs2_journal *journal = osb->journal;
1338
1339 if (ocfs2_is_hard_readonly(osb))
1340 return;
1341
1342
1343
1344 ocfs2_queue_recovery_completion(journal, osb->slot_num,
1345 osb->local_alloc_copy, NULL, NULL,
1346 ORPHAN_NEED_TRUNCATE);
1347 ocfs2_schedule_truncate_log_flush(osb, 0);
1348
1349 osb->local_alloc_copy = NULL;
1350 osb->dirty = 0;
1351
1352
1353 ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
1354 ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
1355 ocfs2_free_replay_slots(osb);
1356}
1357
1358void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
1359{
1360 if (osb->quota_rec) {
1361 ocfs2_queue_recovery_completion(osb->journal,
1362 osb->slot_num,
1363 NULL,
1364 NULL,
1365 osb->quota_rec,
1366 ORPHAN_NEED_TRUNCATE);
1367 osb->quota_rec = NULL;
1368 }
1369}
1370
1371static int __ocfs2_recovery_thread(void *arg)
1372{
1373 int status, node_num, slot_num;
1374 struct ocfs2_super *osb = arg;
1375 struct ocfs2_recovery_map *rm = osb->recovery_map;
1376 int *rm_quota = NULL;
1377 int rm_quota_used = 0, i;
1378 struct ocfs2_quota_recovery *qrec;
1379
1380 status = ocfs2_wait_on_mount(osb);
1381 if (status < 0) {
1382 goto bail;
1383 }
1384
1385 rm_quota = kzalloc(osb->max_slots * sizeof(int), GFP_NOFS);
1386 if (!rm_quota) {
1387 status = -ENOMEM;
1388 goto bail;
1389 }
1390restart:
1391 status = ocfs2_super_lock(osb, 1);
1392 if (status < 0) {
1393 mlog_errno(status);
1394 goto bail;
1395 }
1396
1397 status = ocfs2_compute_replay_slots(osb);
1398 if (status < 0)
1399 mlog_errno(status);
1400
1401
1402 ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
1403 NULL, NULL, ORPHAN_NO_NEED_TRUNCATE);
1404
1405 spin_lock(&osb->osb_lock);
1406 while (rm->rm_used) {
1407
1408
1409 node_num = rm->rm_entries[0];
1410 spin_unlock(&osb->osb_lock);
1411 slot_num = ocfs2_node_num_to_slot(osb, node_num);
1412 trace_ocfs2_recovery_thread_node(node_num, slot_num);
1413 if (slot_num == -ENOENT) {
1414 status = 0;
1415 goto skip_recovery;
1416 }
1417
1418
1419
1420
1421
1422
1423
1424 for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++);
1425 if (i == rm_quota_used)
1426 rm_quota[rm_quota_used++] = slot_num;
1427
1428 status = ocfs2_recover_node(osb, node_num, slot_num);
1429skip_recovery:
1430 if (!status) {
1431 ocfs2_recovery_map_clear(osb, node_num);
1432 } else {
1433 mlog(ML_ERROR,
1434 "Error %d recovering node %d on device (%u,%u)!\n",
1435 status, node_num,
1436 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
1437 mlog(ML_ERROR, "Volume requires unmount.\n");
1438 }
1439
1440 spin_lock(&osb->osb_lock);
1441 }
1442 spin_unlock(&osb->osb_lock);
1443 trace_ocfs2_recovery_thread_end(status);
1444
1445
1446 status = ocfs2_check_journals_nolocks(osb);
1447 status = (status == -EROFS) ? 0 : status;
1448 if (status < 0)
1449 mlog_errno(status);
1450
1451
1452
1453
1454 for (i = 0; i < rm_quota_used; i++) {
1455 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
1456 if (IS_ERR(qrec)) {
1457 status = PTR_ERR(qrec);
1458 mlog_errno(status);
1459 continue;
1460 }
1461 ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
1462 NULL, NULL, qrec,
1463 ORPHAN_NEED_TRUNCATE);
1464 }
1465
1466 ocfs2_super_unlock(osb, 1);
1467
1468
1469 ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
1470
1471bail:
1472 mutex_lock(&osb->recovery_lock);
1473 if (!status && !ocfs2_recovery_completed(osb)) {
1474 mutex_unlock(&osb->recovery_lock);
1475 goto restart;
1476 }
1477
1478 ocfs2_free_replay_slots(osb);
1479 osb->recovery_thread_task = NULL;
1480 mb();
1481 wake_up(&osb->recovery_event);
1482
1483 mutex_unlock(&osb->recovery_lock);
1484
1485 kfree(rm_quota);
1486
1487
1488
1489
1490 complete_and_exit(NULL, status);
1491}
1492
1493void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
1494{
1495 mutex_lock(&osb->recovery_lock);
1496
1497 trace_ocfs2_recovery_thread(node_num, osb->node_num,
1498 osb->disable_recovery, osb->recovery_thread_task,
1499 osb->disable_recovery ?
1500 -1 : ocfs2_recovery_map_set(osb, node_num));
1501
1502 if (osb->disable_recovery)
1503 goto out;
1504
1505 if (osb->recovery_thread_task)
1506 goto out;
1507
1508 osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb,
1509 "ocfs2rec-%s", osb->uuid_str);
1510 if (IS_ERR(osb->recovery_thread_task)) {
1511 mlog_errno((int)PTR_ERR(osb->recovery_thread_task));
1512 osb->recovery_thread_task = NULL;
1513 }
1514
1515out:
1516 mutex_unlock(&osb->recovery_lock);
1517 wake_up(&osb->recovery_event);
1518}
1519
1520static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
1521 int slot_num,
1522 struct buffer_head **bh,
1523 struct inode **ret_inode)
1524{
1525 int status = -EACCES;
1526 struct inode *inode = NULL;
1527
1528 BUG_ON(slot_num >= osb->max_slots);
1529
1530 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1531 slot_num);
1532 if (!inode || is_bad_inode(inode)) {
1533 mlog_errno(status);
1534 goto bail;
1535 }
1536 SET_INODE_JOURNAL(inode);
1537
1538 status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE);
1539 if (status < 0) {
1540 mlog_errno(status);
1541 goto bail;
1542 }
1543
1544 status = 0;
1545
1546bail:
1547 if (inode) {
1548 if (status || !ret_inode)
1549 iput(inode);
1550 else
1551 *ret_inode = inode;
1552 }
1553 return status;
1554}
1555
1556
1557
1558static int ocfs2_replay_journal(struct ocfs2_super *osb,
1559 int node_num,
1560 int slot_num)
1561{
1562 int status;
1563 int got_lock = 0;
1564 unsigned int flags;
1565 struct inode *inode = NULL;
1566 struct ocfs2_dinode *fe;
1567 journal_t *journal = NULL;
1568 struct buffer_head *bh = NULL;
1569 u32 slot_reco_gen;
1570
1571 status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
1572 if (status) {
1573 mlog_errno(status);
1574 goto done;
1575 }
1576
1577 fe = (struct ocfs2_dinode *)bh->b_data;
1578 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1579 brelse(bh);
1580 bh = NULL;
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590 if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
1591 trace_ocfs2_replay_journal_recovered(slot_num,
1592 osb->slot_recovery_generations[slot_num], slot_reco_gen);
1593 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1594 status = -EBUSY;
1595 goto done;
1596 }
1597
1598
1599
1600 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
1601 if (status < 0) {
1602 trace_ocfs2_replay_journal_lock_err(status);
1603 if (status != -ERESTARTSYS)
1604 mlog(ML_ERROR, "Could not lock journal!\n");
1605 goto done;
1606 }
1607 got_lock = 1;
1608
1609 fe = (struct ocfs2_dinode *) bh->b_data;
1610
1611 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1612 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1613
1614 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
1615 trace_ocfs2_replay_journal_skip(node_num);
1616
1617 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1618 goto done;
1619 }
1620
1621
1622 ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
1623
1624 printk(KERN_NOTICE "ocfs2: Begin replay journal (node %d, slot %d) on "\
1625 "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev),
1626 MINOR(osb->sb->s_dev));
1627
1628 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
1629
1630 status = ocfs2_force_read_journal(inode);
1631 if (status < 0) {
1632 mlog_errno(status);
1633 goto done;
1634 }
1635
1636 journal = jbd2_journal_init_inode(inode);
1637 if (journal == NULL) {
1638 mlog(ML_ERROR, "Linux journal layer error\n");
1639 status = -EIO;
1640 goto done;
1641 }
1642
1643 status = jbd2_journal_load(journal);
1644 if (status < 0) {
1645 mlog_errno(status);
1646 if (!igrab(inode))
1647 BUG();
1648 jbd2_journal_destroy(journal);
1649 goto done;
1650 }
1651
1652 ocfs2_clear_journal_error(osb->sb, journal, slot_num);
1653
1654
1655 jbd2_journal_lock_updates(journal);
1656 status = jbd2_journal_flush(journal);
1657 jbd2_journal_unlock_updates(journal);
1658 if (status < 0)
1659 mlog_errno(status);
1660
1661
1662 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1663 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
1664 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
1665
1666
1667 ocfs2_bump_recovery_generation(fe);
1668 osb->slot_recovery_generations[slot_num] =
1669 ocfs2_get_recovery_generation(fe);
1670
1671 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
1672 status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
1673 if (status < 0)
1674 mlog_errno(status);
1675
1676 if (!igrab(inode))
1677 BUG();
1678
1679 jbd2_journal_destroy(journal);
1680
1681 printk(KERN_NOTICE "ocfs2: End replay journal (node %d, slot %d) on "\
1682 "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev),
1683 MINOR(osb->sb->s_dev));
1684done:
1685
1686 if (got_lock)
1687 ocfs2_inode_unlock(inode, 1);
1688
1689 iput(inode);
1690 brelse(bh);
1691
1692 return status;
1693}
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707static int ocfs2_recover_node(struct ocfs2_super *osb,
1708 int node_num, int slot_num)
1709{
1710 int status = 0;
1711 struct ocfs2_dinode *la_copy = NULL;
1712 struct ocfs2_dinode *tl_copy = NULL;
1713
1714 trace_ocfs2_recover_node(node_num, slot_num, osb->node_num);
1715
1716
1717
1718 BUG_ON(osb->node_num == node_num);
1719
1720 status = ocfs2_replay_journal(osb, node_num, slot_num);
1721 if (status < 0) {
1722 if (status == -EBUSY) {
1723 trace_ocfs2_recover_node_skip(slot_num, node_num);
1724 status = 0;
1725 goto done;
1726 }
1727 mlog_errno(status);
1728 goto done;
1729 }
1730
1731
1732 status = ocfs2_begin_local_alloc_recovery(osb, slot_num, &la_copy);
1733 if (status < 0) {
1734 mlog_errno(status);
1735 goto done;
1736 }
1737
1738
1739
1740
1741 status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy);
1742 if (status < 0)
1743 mlog_errno(status);
1744
1745
1746
1747 status = ocfs2_clear_slot(osb, slot_num);
1748 if (status < 0)
1749 mlog_errno(status);
1750
1751
1752 ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
1753 tl_copy, NULL, ORPHAN_NEED_TRUNCATE);
1754
1755 status = 0;
1756done:
1757
1758 return status;
1759}
1760
1761
1762
1763
1764static int ocfs2_trylock_journal(struct ocfs2_super *osb,
1765 int slot_num)
1766{
1767 int status, flags;
1768 struct inode *inode = NULL;
1769
1770 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1771 slot_num);
1772 if (inode == NULL) {
1773 mlog(ML_ERROR, "access error\n");
1774 status = -EACCES;
1775 goto bail;
1776 }
1777 if (is_bad_inode(inode)) {
1778 mlog(ML_ERROR, "access error (bad inode)\n");
1779 iput(inode);
1780 inode = NULL;
1781 status = -EACCES;
1782 goto bail;
1783 }
1784 SET_INODE_JOURNAL(inode);
1785
1786 flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE;
1787 status = ocfs2_inode_lock_full(inode, NULL, 1, flags);
1788 if (status < 0) {
1789 if (status != -EAGAIN)
1790 mlog_errno(status);
1791 goto bail;
1792 }
1793
1794 ocfs2_inode_unlock(inode, 1);
1795bail:
1796 iput(inode);
1797
1798 return status;
1799}
1800
1801
1802
1803int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1804{
1805 unsigned int node_num;
1806 int status, i;
1807 u32 gen;
1808 struct buffer_head *bh = NULL;
1809 struct ocfs2_dinode *di;
1810
1811
1812
1813
1814 for (i = 0; i < osb->max_slots; i++) {
1815
1816 status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
1817 if (status) {
1818 mlog_errno(status);
1819 goto bail;
1820 }
1821 di = (struct ocfs2_dinode *)bh->b_data;
1822 gen = ocfs2_get_recovery_generation(di);
1823 brelse(bh);
1824 bh = NULL;
1825
1826 spin_lock(&osb->osb_lock);
1827 osb->slot_recovery_generations[i] = gen;
1828
1829 trace_ocfs2_mark_dead_nodes(i,
1830 osb->slot_recovery_generations[i]);
1831
1832 if (i == osb->slot_num) {
1833 spin_unlock(&osb->osb_lock);
1834 continue;
1835 }
1836
1837 status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
1838 if (status == -ENOENT) {
1839 spin_unlock(&osb->osb_lock);
1840 continue;
1841 }
1842
1843 if (__ocfs2_recovery_map_test(osb, node_num)) {
1844 spin_unlock(&osb->osb_lock);
1845 continue;
1846 }
1847 spin_unlock(&osb->osb_lock);
1848
1849
1850
1851
1852 status = ocfs2_trylock_journal(osb, i);
1853 if (!status) {
1854
1855
1856
1857 ocfs2_recovery_thread(osb, node_num);
1858 } else if ((status < 0) && (status != -EAGAIN)) {
1859 mlog_errno(status);
1860 goto bail;
1861 }
1862 }
1863
1864 status = 0;
1865bail:
1866 return status;
1867}
1868
1869
1870
1871
1872
1873
1874static inline unsigned long ocfs2_orphan_scan_timeout(void)
1875{
1876 unsigned long time;
1877
1878 get_random_bytes(&time, sizeof(time));
1879 time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000);
1880 return msecs_to_jiffies(time);
1881}
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910static void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1911{
1912 struct ocfs2_orphan_scan *os;
1913 int status, i;
1914 u32 seqno = 0;
1915
1916 os = &osb->osb_orphan_scan;
1917
1918 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1919 goto out;
1920
1921 trace_ocfs2_queue_orphan_scan_begin(os->os_count, os->os_seqno,
1922 atomic_read(&os->os_state));
1923
1924 status = ocfs2_orphan_scan_lock(osb, &seqno);
1925 if (status < 0) {
1926 if (status != -EAGAIN)
1927 mlog_errno(status);
1928 goto out;
1929 }
1930
1931
1932 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1933 goto unlock;
1934
1935 if (os->os_seqno != seqno) {
1936 os->os_seqno = seqno;
1937 goto unlock;
1938 }
1939
1940 for (i = 0; i < osb->max_slots; i++)
1941 ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
1942 NULL, ORPHAN_NO_NEED_TRUNCATE);
1943
1944
1945
1946
1947 seqno++;
1948 os->os_count++;
1949 os->os_scantime = CURRENT_TIME;
1950unlock:
1951 ocfs2_orphan_scan_unlock(osb, seqno);
1952out:
1953 trace_ocfs2_queue_orphan_scan_end(os->os_count, os->os_seqno,
1954 atomic_read(&os->os_state));
1955 return;
1956}
1957
1958
1959static void ocfs2_orphan_scan_work(struct work_struct *work)
1960{
1961 struct ocfs2_orphan_scan *os;
1962 struct ocfs2_super *osb;
1963
1964 os = container_of(work, struct ocfs2_orphan_scan,
1965 os_orphan_scan_work.work);
1966 osb = os->os_osb;
1967
1968 mutex_lock(&os->os_lock);
1969 ocfs2_queue_orphan_scan(osb);
1970 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
1971 queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
1972 ocfs2_orphan_scan_timeout());
1973 mutex_unlock(&os->os_lock);
1974}
1975
1976void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
1977{
1978 struct ocfs2_orphan_scan *os;
1979
1980 os = &osb->osb_orphan_scan;
1981 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) {
1982 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1983 mutex_lock(&os->os_lock);
1984 cancel_delayed_work(&os->os_orphan_scan_work);
1985 mutex_unlock(&os->os_lock);
1986 }
1987}
1988
1989void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
1990{
1991 struct ocfs2_orphan_scan *os;
1992
1993 os = &osb->osb_orphan_scan;
1994 os->os_osb = osb;
1995 os->os_count = 0;
1996 os->os_seqno = 0;
1997 mutex_init(&os->os_lock);
1998 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
1999}
2000
2001void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
2002{
2003 struct ocfs2_orphan_scan *os;
2004
2005 os = &osb->osb_orphan_scan;
2006 os->os_scantime = CURRENT_TIME;
2007 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
2008 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
2009 else {
2010 atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
2011 queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
2012 ocfs2_orphan_scan_timeout());
2013 }
2014}
2015
2016struct ocfs2_orphan_filldir_priv {
2017 struct dir_context ctx;
2018 struct inode *head;
2019 struct ocfs2_super *osb;
2020 enum ocfs2_orphan_reco_type orphan_reco_type;
2021};
2022
2023static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
2024 int name_len, loff_t pos, u64 ino,
2025 unsigned type)
2026{
2027 struct ocfs2_orphan_filldir_priv *p =
2028 container_of(ctx, struct ocfs2_orphan_filldir_priv, ctx);
2029 struct inode *iter;
2030
2031 if (name_len == 1 && !strncmp(".", name, 1))
2032 return 0;
2033 if (name_len == 2 && !strncmp("..", name, 2))
2034 return 0;
2035
2036
2037 if ((p->orphan_reco_type == ORPHAN_NO_NEED_TRUNCATE) &&
2038 (!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX,
2039 OCFS2_DIO_ORPHAN_PREFIX_LEN)))
2040 return 0;
2041
2042
2043 iter = ocfs2_iget(p->osb, ino,
2044 OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0);
2045 if (IS_ERR(iter))
2046 return 0;
2047
2048 if (!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX,
2049 OCFS2_DIO_ORPHAN_PREFIX_LEN))
2050 OCFS2_I(iter)->ip_flags |= OCFS2_INODE_DIO_ORPHAN_ENTRY;
2051
2052
2053
2054 if (OCFS2_I(iter)->ip_next_orphan) {
2055 iput(iter);
2056 return 0;
2057 }
2058
2059 trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno);
2060
2061
2062 OCFS2_I(iter)->ip_next_orphan = p->head;
2063 p->head = iter;
2064
2065 return 0;
2066}
2067
2068static int ocfs2_queue_orphans(struct ocfs2_super *osb,
2069 int slot,
2070 struct inode **head,
2071 enum ocfs2_orphan_reco_type orphan_reco_type)
2072{
2073 int status;
2074 struct inode *orphan_dir_inode = NULL;
2075 struct ocfs2_orphan_filldir_priv priv = {
2076 .ctx.actor = ocfs2_orphan_filldir,
2077 .osb = osb,
2078 .head = *head,
2079 .orphan_reco_type = orphan_reco_type
2080 };
2081
2082 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2083 ORPHAN_DIR_SYSTEM_INODE,
2084 slot);
2085 if (!orphan_dir_inode) {
2086 status = -ENOENT;
2087 mlog_errno(status);
2088 return status;
2089 }
2090
2091 inode_lock(orphan_dir_inode);
2092 status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
2093 if (status < 0) {
2094 mlog_errno(status);
2095 goto out;
2096 }
2097
2098 status = ocfs2_dir_foreach(orphan_dir_inode, &priv.ctx);
2099 if (status) {
2100 mlog_errno(status);
2101 goto out_cluster;
2102 }
2103
2104 *head = priv.head;
2105
2106out_cluster:
2107 ocfs2_inode_unlock(orphan_dir_inode, 0);
2108out:
2109 inode_unlock(orphan_dir_inode);
2110 iput(orphan_dir_inode);
2111 return status;
2112}
2113
2114static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb,
2115 int slot)
2116{
2117 int ret;
2118
2119 spin_lock(&osb->osb_lock);
2120 ret = !osb->osb_orphan_wipes[slot];
2121 spin_unlock(&osb->osb_lock);
2122 return ret;
2123}
2124
2125static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb,
2126 int slot)
2127{
2128 spin_lock(&osb->osb_lock);
2129
2130
2131 ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
2132 while (osb->osb_orphan_wipes[slot]) {
2133
2134
2135
2136 spin_unlock(&osb->osb_lock);
2137 wait_event_interruptible(osb->osb_wipe_event,
2138 ocfs2_orphan_recovery_can_continue(osb, slot));
2139 spin_lock(&osb->osb_lock);
2140 }
2141 spin_unlock(&osb->osb_lock);
2142}
2143
2144static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,
2145 int slot)
2146{
2147 ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
2148}
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168static int ocfs2_recover_orphans(struct ocfs2_super *osb,
2169 int slot,
2170 enum ocfs2_orphan_reco_type orphan_reco_type)
2171{
2172 int ret = 0;
2173 struct inode *inode = NULL;
2174 struct inode *iter;
2175 struct ocfs2_inode_info *oi;
2176 struct buffer_head *di_bh = NULL;
2177 struct ocfs2_dinode *di = NULL;
2178
2179 trace_ocfs2_recover_orphans(slot);
2180
2181 ocfs2_mark_recovering_orphan_dir(osb, slot);
2182 ret = ocfs2_queue_orphans(osb, slot, &inode, orphan_reco_type);
2183 ocfs2_clear_recovering_orphan_dir(osb, slot);
2184
2185
2186
2187 if (ret)
2188 mlog_errno(ret);
2189
2190 while (inode) {
2191 oi = OCFS2_I(inode);
2192 trace_ocfs2_recover_orphans_iput(
2193 (unsigned long long)oi->ip_blkno);
2194
2195 iter = oi->ip_next_orphan;
2196 oi->ip_next_orphan = NULL;
2197
2198 if (oi->ip_flags & OCFS2_INODE_DIO_ORPHAN_ENTRY) {
2199 inode_lock(inode);
2200 ret = ocfs2_rw_lock(inode, 1);
2201 if (ret < 0) {
2202 mlog_errno(ret);
2203 goto unlock_mutex;
2204 }
2205
2206
2207
2208
2209 ret = ocfs2_inode_lock(inode, &di_bh, 1);
2210 if (ret) {
2211 mlog_errno(ret);
2212 goto unlock_rw;
2213 }
2214
2215 di = (struct ocfs2_dinode *)di_bh->b_data;
2216
2217 if (di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL)) {
2218 ret = ocfs2_truncate_file(inode, di_bh,
2219 i_size_read(inode));
2220 if (ret < 0) {
2221 if (ret != -ENOSPC)
2222 mlog_errno(ret);
2223 goto unlock_inode;
2224 }
2225
2226 ret = ocfs2_del_inode_from_orphan(osb, inode,
2227 di_bh, 0, 0);
2228 if (ret)
2229 mlog_errno(ret);
2230 }
2231unlock_inode:
2232 ocfs2_inode_unlock(inode, 1);
2233 brelse(di_bh);
2234 di_bh = NULL;
2235unlock_rw:
2236 ocfs2_rw_unlock(inode, 1);
2237unlock_mutex:
2238 inode_unlock(inode);
2239
2240
2241 oi->ip_flags &= ~OCFS2_INODE_DIO_ORPHAN_ENTRY;
2242 } else {
2243 spin_lock(&oi->ip_lock);
2244
2245
2246 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
2247 spin_unlock(&oi->ip_lock);
2248 }
2249
2250 iput(inode);
2251 inode = iter;
2252 }
2253
2254 return ret;
2255}
2256
2257static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
2258{
2259
2260
2261
2262 wait_event(osb->osb_mount_event,
2263 (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) ||
2264 atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS ||
2265 atomic_read(&osb->vol_state) == VOLUME_DISABLED);
2266
2267
2268
2269
2270 if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) {
2271 trace_ocfs2_wait_on_mount(VOLUME_DISABLED);
2272 mlog(0, "mount error, exiting!\n");
2273 return -EBUSY;
2274 }
2275
2276 return 0;
2277}
2278
2279static int ocfs2_commit_thread(void *arg)
2280{
2281 int status;
2282 struct ocfs2_super *osb = arg;
2283 struct ocfs2_journal *journal = osb->journal;
2284
2285
2286
2287
2288
2289 while (!(kthread_should_stop() &&
2290 atomic_read(&journal->j_num_trans) == 0)) {
2291
2292 wait_event_interruptible(osb->checkpoint_event,
2293 atomic_read(&journal->j_num_trans)
2294 || kthread_should_stop());
2295
2296 status = ocfs2_commit_cache(osb);
2297 if (status < 0) {
2298 static unsigned long abort_warn_time;
2299
2300
2301 if (printk_timed_ratelimit(&abort_warn_time, 60*HZ))
2302 mlog(ML_ERROR, "status = %d, journal is "
2303 "already aborted.\n", status);
2304
2305
2306
2307
2308
2309 msleep_interruptible(1000);
2310 }
2311
2312 if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){
2313 mlog(ML_KTHREAD,
2314 "commit_thread: %u transactions pending on "
2315 "shutdown\n",
2316 atomic_read(&journal->j_num_trans));
2317 }
2318 }
2319
2320 return 0;
2321}
2322
2323
2324
2325
2326
2327
2328int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
2329{
2330 int ret = 0;
2331 unsigned int slot;
2332 struct buffer_head *di_bh = NULL;
2333 struct ocfs2_dinode *di;
2334 int journal_dirty = 0;
2335
2336 for(slot = 0; slot < osb->max_slots; slot++) {
2337 ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
2338 if (ret) {
2339 mlog_errno(ret);
2340 goto out;
2341 }
2342
2343 di = (struct ocfs2_dinode *) di_bh->b_data;
2344
2345 osb->slot_recovery_generations[slot] =
2346 ocfs2_get_recovery_generation(di);
2347
2348 if (le32_to_cpu(di->id1.journal1.ij_flags) &
2349 OCFS2_JOURNAL_DIRTY_FL)
2350 journal_dirty = 1;
2351
2352 brelse(di_bh);
2353 di_bh = NULL;
2354 }
2355
2356out:
2357 if (journal_dirty)
2358 ret = -EROFS;
2359 return ret;
2360}
2361