1
2
3
4
5
6
7
8
9
10
11
12#include <linux/fs.h>
13#include <linux/types.h>
14#include <linux/slab.h>
15#include <linux/highmem.h>
16#include <linux/kthread.h>
17#include <linux/time.h>
18#include <linux/random.h>
19#include <linux/delay.h>
20
21#include <cluster/masklog.h>
22
23#include "ocfs2.h"
24
25#include "alloc.h"
26#include "blockcheck.h"
27#include "dir.h"
28#include "dlmglue.h"
29#include "extent_map.h"
30#include "heartbeat.h"
31#include "inode.h"
32#include "journal.h"
33#include "localalloc.h"
34#include "slot_map.h"
35#include "super.h"
36#include "sysfile.h"
37#include "uptodate.h"
38#include "quota.h"
39#include "file.h"
40#include "namei.h"
41
42#include "buffer_head_io.h"
43#include "ocfs2_trace.h"
44
45DEFINE_SPINLOCK(trans_inc_lock);
46
47#define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000
48
49static int ocfs2_force_read_journal(struct inode *inode);
50static int ocfs2_recover_node(struct ocfs2_super *osb,
51 int node_num, int slot_num);
52static int __ocfs2_recovery_thread(void *arg);
53static int ocfs2_commit_cache(struct ocfs2_super *osb);
54static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota);
55static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
56 int dirty, int replayed);
57static int ocfs2_trylock_journal(struct ocfs2_super *osb,
58 int slot_num);
59static int ocfs2_recover_orphans(struct ocfs2_super *osb,
60 int slot,
61 enum ocfs2_orphan_reco_type orphan_reco_type);
62static int ocfs2_commit_thread(void *arg);
63static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
64 int slot_num,
65 struct ocfs2_dinode *la_dinode,
66 struct ocfs2_dinode *tl_dinode,
67 struct ocfs2_quota_recovery *qrec,
68 enum ocfs2_orphan_reco_type orphan_reco_type);
69
70static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
71{
72 return __ocfs2_wait_on_mount(osb, 0);
73}
74
75static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
76{
77 return __ocfs2_wait_on_mount(osb, 1);
78}
79
80
81
82
83
84
85enum ocfs2_replay_state {
86 REPLAY_UNNEEDED = 0,
87 REPLAY_NEEDED,
88 REPLAY_DONE
89};
90
91struct ocfs2_replay_map {
92 unsigned int rm_slots;
93 enum ocfs2_replay_state rm_state;
94 unsigned char rm_replay_slots[0];
95};
96
97static void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
98{
99 if (!osb->replay_map)
100 return;
101
102
103 if (osb->replay_map->rm_state == REPLAY_DONE)
104 return;
105
106 osb->replay_map->rm_state = state;
107}
108
109int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
110{
111 struct ocfs2_replay_map *replay_map;
112 int i, node_num;
113
114
115 if (osb->replay_map)
116 return 0;
117
118 replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
119 (osb->max_slots * sizeof(char)), GFP_KERNEL);
120
121 if (!replay_map) {
122 mlog_errno(-ENOMEM);
123 return -ENOMEM;
124 }
125
126 spin_lock(&osb->osb_lock);
127
128 replay_map->rm_slots = osb->max_slots;
129 replay_map->rm_state = REPLAY_UNNEEDED;
130
131
132 for (i = 0; i < replay_map->rm_slots; i++) {
133 if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT)
134 replay_map->rm_replay_slots[i] = 1;
135 }
136
137 osb->replay_map = replay_map;
138 spin_unlock(&osb->osb_lock);
139 return 0;
140}
141
142static void ocfs2_queue_replay_slots(struct ocfs2_super *osb,
143 enum ocfs2_orphan_reco_type orphan_reco_type)
144{
145 struct ocfs2_replay_map *replay_map = osb->replay_map;
146 int i;
147
148 if (!replay_map)
149 return;
150
151 if (replay_map->rm_state != REPLAY_NEEDED)
152 return;
153
154 for (i = 0; i < replay_map->rm_slots; i++)
155 if (replay_map->rm_replay_slots[i])
156 ocfs2_queue_recovery_completion(osb->journal, i, NULL,
157 NULL, NULL,
158 orphan_reco_type);
159 replay_map->rm_state = REPLAY_DONE;
160}
161
162static void ocfs2_free_replay_slots(struct ocfs2_super *osb)
163{
164 struct ocfs2_replay_map *replay_map = osb->replay_map;
165
166 if (!osb->replay_map)
167 return;
168
169 kfree(replay_map);
170 osb->replay_map = NULL;
171}
172
173int ocfs2_recovery_init(struct ocfs2_super *osb)
174{
175 struct ocfs2_recovery_map *rm;
176
177 mutex_init(&osb->recovery_lock);
178 osb->disable_recovery = 0;
179 osb->recovery_thread_task = NULL;
180 init_waitqueue_head(&osb->recovery_event);
181
182 rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
183 osb->max_slots * sizeof(unsigned int),
184 GFP_KERNEL);
185 if (!rm) {
186 mlog_errno(-ENOMEM);
187 return -ENOMEM;
188 }
189
190 rm->rm_entries = (unsigned int *)((char *)rm +
191 sizeof(struct ocfs2_recovery_map));
192 osb->recovery_map = rm;
193
194 return 0;
195}
196
197
198
199
200static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
201{
202 mb();
203 return osb->recovery_thread_task != NULL;
204}
205
206void ocfs2_recovery_exit(struct ocfs2_super *osb)
207{
208 struct ocfs2_recovery_map *rm;
209
210
211
212 mutex_lock(&osb->recovery_lock);
213 osb->disable_recovery = 1;
214 mutex_unlock(&osb->recovery_lock);
215 wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
216
217
218
219
220 if (osb->ocfs2_wq)
221 flush_workqueue(osb->ocfs2_wq);
222
223
224
225
226
227 rm = osb->recovery_map;
228
229
230 kfree(rm);
231}
232
233static int __ocfs2_recovery_map_test(struct ocfs2_super *osb,
234 unsigned int node_num)
235{
236 int i;
237 struct ocfs2_recovery_map *rm = osb->recovery_map;
238
239 assert_spin_locked(&osb->osb_lock);
240
241 for (i = 0; i < rm->rm_used; i++) {
242 if (rm->rm_entries[i] == node_num)
243 return 1;
244 }
245
246 return 0;
247}
248
249
250static int ocfs2_recovery_map_set(struct ocfs2_super *osb,
251 unsigned int node_num)
252{
253 struct ocfs2_recovery_map *rm = osb->recovery_map;
254
255 spin_lock(&osb->osb_lock);
256 if (__ocfs2_recovery_map_test(osb, node_num)) {
257 spin_unlock(&osb->osb_lock);
258 return 1;
259 }
260
261
262 BUG_ON(rm->rm_used >= osb->max_slots);
263
264 rm->rm_entries[rm->rm_used] = node_num;
265 rm->rm_used++;
266 spin_unlock(&osb->osb_lock);
267
268 return 0;
269}
270
271static void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
272 unsigned int node_num)
273{
274 int i;
275 struct ocfs2_recovery_map *rm = osb->recovery_map;
276
277 spin_lock(&osb->osb_lock);
278
279 for (i = 0; i < rm->rm_used; i++) {
280 if (rm->rm_entries[i] == node_num)
281 break;
282 }
283
284 if (i < rm->rm_used) {
285
286 memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]),
287 (rm->rm_used - i - 1) * sizeof(unsigned int));
288 rm->rm_used--;
289 }
290
291 spin_unlock(&osb->osb_lock);
292}
293
294static int ocfs2_commit_cache(struct ocfs2_super *osb)
295{
296 int status = 0;
297 unsigned int flushed;
298 struct ocfs2_journal *journal = NULL;
299
300 journal = osb->journal;
301
302
303 down_write(&journal->j_trans_barrier);
304
305 flushed = atomic_read(&journal->j_num_trans);
306 trace_ocfs2_commit_cache_begin(flushed);
307 if (flushed == 0) {
308 up_write(&journal->j_trans_barrier);
309 goto finally;
310 }
311
312 jbd2_journal_lock_updates(journal->j_journal);
313 status = jbd2_journal_flush(journal->j_journal);
314 jbd2_journal_unlock_updates(journal->j_journal);
315 if (status < 0) {
316 up_write(&journal->j_trans_barrier);
317 mlog_errno(status);
318 goto finally;
319 }
320
321 ocfs2_inc_trans_id(journal);
322
323 flushed = atomic_read(&journal->j_num_trans);
324 atomic_set(&journal->j_num_trans, 0);
325 up_write(&journal->j_trans_barrier);
326
327 trace_ocfs2_commit_cache_end(journal->j_trans_id, flushed);
328
329 ocfs2_wake_downconvert_thread(osb);
330 wake_up(&journal->j_checkpointed);
331finally:
332 return status;
333}
334
335handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
336{
337 journal_t *journal = osb->journal->j_journal;
338 handle_t *handle;
339
340 BUG_ON(!osb || !osb->journal->j_journal);
341
342 if (ocfs2_is_hard_readonly(osb))
343 return ERR_PTR(-EROFS);
344
345 BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
346 BUG_ON(max_buffs <= 0);
347
348
349 if (journal_current_handle())
350 return jbd2_journal_start(journal, max_buffs);
351
352 sb_start_intwrite(osb->sb);
353
354 down_read(&osb->journal->j_trans_barrier);
355
356 handle = jbd2_journal_start(journal, max_buffs);
357 if (IS_ERR(handle)) {
358 up_read(&osb->journal->j_trans_barrier);
359 sb_end_intwrite(osb->sb);
360
361 mlog_errno(PTR_ERR(handle));
362
363 if (is_journal_aborted(journal)) {
364 ocfs2_abort(osb->sb, "Detected aborted journal\n");
365 handle = ERR_PTR(-EROFS);
366 }
367 } else {
368 if (!ocfs2_mount_local(osb))
369 atomic_inc(&(osb->journal->j_num_trans));
370 }
371
372 return handle;
373}
374
375int ocfs2_commit_trans(struct ocfs2_super *osb,
376 handle_t *handle)
377{
378 int ret, nested;
379 struct ocfs2_journal *journal = osb->journal;
380
381 BUG_ON(!handle);
382
383 nested = handle->h_ref > 1;
384 ret = jbd2_journal_stop(handle);
385 if (ret < 0)
386 mlog_errno(ret);
387
388 if (!nested) {
389 up_read(&journal->j_trans_barrier);
390 sb_end_intwrite(osb->sb);
391 }
392
393 return ret;
394}
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413int ocfs2_extend_trans(handle_t *handle, int nblocks)
414{
415 int status, old_nblocks;
416
417 BUG_ON(!handle);
418 BUG_ON(nblocks < 0);
419
420 if (!nblocks)
421 return 0;
422
423 old_nblocks = handle->h_buffer_credits;
424
425 trace_ocfs2_extend_trans(old_nblocks, nblocks);
426
427#ifdef CONFIG_OCFS2_DEBUG_FS
428 status = 1;
429#else
430 status = jbd2_journal_extend(handle, nblocks);
431 if (status < 0) {
432 mlog_errno(status);
433 goto bail;
434 }
435#endif
436
437 if (status > 0) {
438 trace_ocfs2_extend_trans_restart(old_nblocks + nblocks);
439 status = jbd2_journal_restart(handle,
440 old_nblocks + nblocks);
441 if (status < 0) {
442 mlog_errno(status);
443 goto bail;
444 }
445 }
446
447 status = 0;
448bail:
449 return status;
450}
451
452
453
454
455
456
457
458int ocfs2_allocate_extend_trans(handle_t *handle, int thresh)
459{
460 int status, old_nblks;
461
462 BUG_ON(!handle);
463
464 old_nblks = handle->h_buffer_credits;
465 trace_ocfs2_allocate_extend_trans(old_nblks, thresh);
466
467 if (old_nblks < thresh)
468 return 0;
469
470 status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA);
471 if (status < 0) {
472 mlog_errno(status);
473 goto bail;
474 }
475
476 if (status > 0) {
477 status = jbd2_journal_restart(handle, OCFS2_MAX_TRANS_DATA);
478 if (status < 0)
479 mlog_errno(status);
480 }
481
482bail:
483 return status;
484}
485
486
487struct ocfs2_triggers {
488 struct jbd2_buffer_trigger_type ot_triggers;
489 int ot_offset;
490};
491
492static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
493{
494 return container_of(triggers, struct ocfs2_triggers, ot_triggers);
495}
496
497static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
498 struct buffer_head *bh,
499 void *data, size_t size)
500{
501 struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
502
503
504
505
506
507
508
509 ocfs2_block_check_compute(data, size, data + ot->ot_offset);
510}
511
512
513
514
515
516static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
517 struct buffer_head *bh,
518 void *data, size_t size)
519{
520 struct ocfs2_disk_dqtrailer *dqt =
521 ocfs2_block_dqtrailer(size, data);
522
523
524
525
526
527
528
529 ocfs2_block_check_compute(data, size, &dqt->dq_check);
530}
531
532
533
534
535
536static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
537 struct buffer_head *bh,
538 void *data, size_t size)
539{
540 struct ocfs2_dir_block_trailer *trailer =
541 ocfs2_dir_trailer_from_size(size, data);
542
543
544
545
546
547
548
549 ocfs2_block_check_compute(data, size, &trailer->db_check);
550}
551
552static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
553 struct buffer_head *bh)
554{
555 mlog(ML_ERROR,
556 "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, "
557 "bh->b_blocknr = %llu\n",
558 (unsigned long)bh,
559 (unsigned long long)bh->b_blocknr);
560
561 ocfs2_error(bh->b_bdev->bd_super,
562 "JBD2 has aborted our journal, ocfs2 cannot continue\n");
563}
564
565static struct ocfs2_triggers di_triggers = {
566 .ot_triggers = {
567 .t_frozen = ocfs2_frozen_trigger,
568 .t_abort = ocfs2_abort_trigger,
569 },
570 .ot_offset = offsetof(struct ocfs2_dinode, i_check),
571};
572
573static struct ocfs2_triggers eb_triggers = {
574 .ot_triggers = {
575 .t_frozen = ocfs2_frozen_trigger,
576 .t_abort = ocfs2_abort_trigger,
577 },
578 .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
579};
580
581static struct ocfs2_triggers rb_triggers = {
582 .ot_triggers = {
583 .t_frozen = ocfs2_frozen_trigger,
584 .t_abort = ocfs2_abort_trigger,
585 },
586 .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
587};
588
589static struct ocfs2_triggers gd_triggers = {
590 .ot_triggers = {
591 .t_frozen = ocfs2_frozen_trigger,
592 .t_abort = ocfs2_abort_trigger,
593 },
594 .ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
595};
596
597static struct ocfs2_triggers db_triggers = {
598 .ot_triggers = {
599 .t_frozen = ocfs2_db_frozen_trigger,
600 .t_abort = ocfs2_abort_trigger,
601 },
602};
603
604static struct ocfs2_triggers xb_triggers = {
605 .ot_triggers = {
606 .t_frozen = ocfs2_frozen_trigger,
607 .t_abort = ocfs2_abort_trigger,
608 },
609 .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
610};
611
612static struct ocfs2_triggers dq_triggers = {
613 .ot_triggers = {
614 .t_frozen = ocfs2_dq_frozen_trigger,
615 .t_abort = ocfs2_abort_trigger,
616 },
617};
618
619static struct ocfs2_triggers dr_triggers = {
620 .ot_triggers = {
621 .t_frozen = ocfs2_frozen_trigger,
622 .t_abort = ocfs2_abort_trigger,
623 },
624 .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
625};
626
627static struct ocfs2_triggers dl_triggers = {
628 .ot_triggers = {
629 .t_frozen = ocfs2_frozen_trigger,
630 .t_abort = ocfs2_abort_trigger,
631 },
632 .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
633};
634
635static int __ocfs2_journal_access(handle_t *handle,
636 struct ocfs2_caching_info *ci,
637 struct buffer_head *bh,
638 struct ocfs2_triggers *triggers,
639 int type)
640{
641 int status;
642 struct ocfs2_super *osb =
643 OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
644
645 BUG_ON(!ci || !ci->ci_ops);
646 BUG_ON(!handle);
647 BUG_ON(!bh);
648
649 trace_ocfs2_journal_access(
650 (unsigned long long)ocfs2_metadata_cache_owner(ci),
651 (unsigned long long)bh->b_blocknr, type, bh->b_size);
652
653
654 if (!buffer_uptodate(bh)) {
655 mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n");
656 mlog(ML_ERROR, "b_blocknr=%llu, b_state=0x%lx\n",
657 (unsigned long long)bh->b_blocknr, bh->b_state);
658
659 lock_buffer(bh);
660
661
662
663
664
665
666
667
668
669
670 if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) {
671 unlock_buffer(bh);
672 return ocfs2_error(osb->sb, "A previous attempt to "
673 "write this buffer head failed\n");
674 }
675 unlock_buffer(bh);
676 }
677
678
679
680
681
682
683
684 ocfs2_set_ci_lock_trans(osb->journal, ci);
685
686 ocfs2_metadata_cache_io_lock(ci);
687 switch (type) {
688 case OCFS2_JOURNAL_ACCESS_CREATE:
689 case OCFS2_JOURNAL_ACCESS_WRITE:
690 status = jbd2_journal_get_write_access(handle, bh);
691 break;
692
693 case OCFS2_JOURNAL_ACCESS_UNDO:
694 status = jbd2_journal_get_undo_access(handle, bh);
695 break;
696
697 default:
698 status = -EINVAL;
699 mlog(ML_ERROR, "Unknown access type!\n");
700 }
701 if (!status && ocfs2_meta_ecc(osb) && triggers)
702 jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
703 ocfs2_metadata_cache_io_unlock(ci);
704
705 if (status < 0)
706 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n",
707 status, type);
708
709 return status;
710}
711
712int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
713 struct buffer_head *bh, int type)
714{
715 return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
716}
717
718int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
719 struct buffer_head *bh, int type)
720{
721 return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
722}
723
724int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
725 struct buffer_head *bh, int type)
726{
727 return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
728 type);
729}
730
731int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
732 struct buffer_head *bh, int type)
733{
734 return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
735}
736
737int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
738 struct buffer_head *bh, int type)
739{
740 return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
741}
742
743int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
744 struct buffer_head *bh, int type)
745{
746 return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
747}
748
749int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
750 struct buffer_head *bh, int type)
751{
752 return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
753}
754
755int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
756 struct buffer_head *bh, int type)
757{
758 return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
759}
760
761int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
762 struct buffer_head *bh, int type)
763{
764 return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
765}
766
767int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
768 struct buffer_head *bh, int type)
769{
770 return __ocfs2_journal_access(handle, ci, bh, NULL, type);
771}
772
773void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
774{
775 int status;
776
777 trace_ocfs2_journal_dirty((unsigned long long)bh->b_blocknr);
778
779 status = jbd2_journal_dirty_metadata(handle, bh);
780 if (status) {
781 mlog_errno(status);
782 if (!is_handle_aborted(handle)) {
783 journal_t *journal = handle->h_transaction->t_journal;
784 struct super_block *sb = bh->b_bdev->bd_super;
785
786 mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. "
787 "Aborting transaction and journal.\n");
788 handle->h_err = status;
789 jbd2_journal_abort_handle(handle);
790 jbd2_journal_abort(journal, status);
791 ocfs2_abort(sb, "Journal already aborted.\n");
792 }
793 }
794}
795
796#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
797
798void ocfs2_set_journal_params(struct ocfs2_super *osb)
799{
800 journal_t *journal = osb->journal->j_journal;
801 unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
802
803 if (osb->osb_commit_interval)
804 commit_interval = osb->osb_commit_interval;
805
806 write_lock(&journal->j_state_lock);
807 journal->j_commit_interval = commit_interval;
808 if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
809 journal->j_flags |= JBD2_BARRIER;
810 else
811 journal->j_flags &= ~JBD2_BARRIER;
812 write_unlock(&journal->j_state_lock);
813}
814
815int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
816{
817 int status = -1;
818 struct inode *inode = NULL;
819 journal_t *j_journal = NULL;
820 struct ocfs2_dinode *di = NULL;
821 struct buffer_head *bh = NULL;
822 struct ocfs2_super *osb;
823 int inode_lock = 0;
824
825 BUG_ON(!journal);
826
827 osb = journal->j_osb;
828
829
830 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
831 osb->slot_num);
832 if (inode == NULL) {
833 status = -EACCES;
834 mlog_errno(status);
835 goto done;
836 }
837 if (is_bad_inode(inode)) {
838 mlog(ML_ERROR, "access error (bad inode)\n");
839 iput(inode);
840 inode = NULL;
841 status = -EACCES;
842 goto done;
843 }
844
845 SET_INODE_JOURNAL(inode);
846 OCFS2_I(inode)->ip_open_count++;
847
848
849
850
851 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
852 if (status < 0) {
853 if (status != -ERESTARTSYS)
854 mlog(ML_ERROR, "Could not get lock on journal!\n");
855 goto done;
856 }
857
858 inode_lock = 1;
859 di = (struct ocfs2_dinode *)bh->b_data;
860
861 if (i_size_read(inode) < OCFS2_MIN_JOURNAL_SIZE) {
862 mlog(ML_ERROR, "Journal file size (%lld) is too small!\n",
863 i_size_read(inode));
864 status = -EINVAL;
865 goto done;
866 }
867
868 trace_ocfs2_journal_init(i_size_read(inode),
869 (unsigned long long)inode->i_blocks,
870 OCFS2_I(inode)->ip_clusters);
871
872
873 j_journal = jbd2_journal_init_inode(inode);
874 if (j_journal == NULL) {
875 mlog(ML_ERROR, "Linux journal layer error\n");
876 status = -EINVAL;
877 goto done;
878 }
879
880 trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen);
881
882 *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
883 OCFS2_JOURNAL_DIRTY_FL);
884
885 journal->j_journal = j_journal;
886 journal->j_inode = inode;
887 journal->j_bh = bh;
888
889 ocfs2_set_journal_params(osb);
890
891 journal->j_state = OCFS2_JOURNAL_LOADED;
892
893 status = 0;
894done:
895 if (status < 0) {
896 if (inode_lock)
897 ocfs2_inode_unlock(inode, 1);
898 brelse(bh);
899 if (inode) {
900 OCFS2_I(inode)->ip_open_count--;
901 iput(inode);
902 }
903 }
904
905 return status;
906}
907
908static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
909{
910 le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
911}
912
913static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
914{
915 return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
916}
917
918static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
919 int dirty, int replayed)
920{
921 int status;
922 unsigned int flags;
923 struct ocfs2_journal *journal = osb->journal;
924 struct buffer_head *bh = journal->j_bh;
925 struct ocfs2_dinode *fe;
926
927 fe = (struct ocfs2_dinode *)bh->b_data;
928
929
930
931
932 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
933
934 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
935 if (dirty)
936 flags |= OCFS2_JOURNAL_DIRTY_FL;
937 else
938 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
939 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
940
941 if (replayed)
942 ocfs2_bump_recovery_generation(fe);
943
944 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
945 status = ocfs2_write_block(osb, bh, INODE_CACHE(journal->j_inode));
946 if (status < 0)
947 mlog_errno(status);
948
949 return status;
950}
951
952
953
954
955
956void ocfs2_journal_shutdown(struct ocfs2_super *osb)
957{
958 struct ocfs2_journal *journal = NULL;
959 int status = 0;
960 struct inode *inode = NULL;
961 int num_running_trans = 0;
962
963 BUG_ON(!osb);
964
965 journal = osb->journal;
966 if (!journal)
967 goto done;
968
969 inode = journal->j_inode;
970
971 if (journal->j_state != OCFS2_JOURNAL_LOADED)
972 goto done;
973
974
975 if (!igrab(inode))
976 BUG();
977
978 num_running_trans = atomic_read(&(osb->journal->j_num_trans));
979 trace_ocfs2_journal_shutdown(num_running_trans);
980
981
982
983
984
985 journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN;
986
987
988
989
990 if (osb->commit_task) {
991
992 trace_ocfs2_journal_shutdown_wait(osb->commit_task);
993 kthread_stop(osb->commit_task);
994 osb->commit_task = NULL;
995 }
996
997 BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
998
999 if (ocfs2_mount_local(osb)) {
1000 jbd2_journal_lock_updates(journal->j_journal);
1001 status = jbd2_journal_flush(journal->j_journal);
1002 jbd2_journal_unlock_updates(journal->j_journal);
1003 if (status < 0)
1004 mlog_errno(status);
1005 }
1006
1007
1008 if (!jbd2_journal_destroy(journal->j_journal) && !status) {
1009
1010
1011
1012
1013 status = ocfs2_journal_toggle_dirty(osb, 0, 0);
1014 if (status < 0)
1015 mlog_errno(status);
1016 }
1017 journal->j_journal = NULL;
1018
1019 OCFS2_I(inode)->ip_open_count--;
1020
1021
1022 ocfs2_inode_unlock(inode, 1);
1023
1024 brelse(journal->j_bh);
1025 journal->j_bh = NULL;
1026
1027 journal->j_state = OCFS2_JOURNAL_FREE;
1028
1029
1030done:
1031 iput(inode);
1032}
1033
1034static void ocfs2_clear_journal_error(struct super_block *sb,
1035 journal_t *journal,
1036 int slot)
1037{
1038 int olderr;
1039
1040 olderr = jbd2_journal_errno(journal);
1041 if (olderr) {
1042 mlog(ML_ERROR, "File system error %d recorded in "
1043 "journal %u.\n", olderr, slot);
1044 mlog(ML_ERROR, "File system on device %s needs checking.\n",
1045 sb->s_id);
1046
1047 jbd2_journal_ack_err(journal);
1048 jbd2_journal_clear_err(journal);
1049 }
1050}
1051
1052int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
1053{
1054 int status = 0;
1055 struct ocfs2_super *osb;
1056
1057 BUG_ON(!journal);
1058
1059 osb = journal->j_osb;
1060
1061 status = jbd2_journal_load(journal->j_journal);
1062 if (status < 0) {
1063 mlog(ML_ERROR, "Failed to load journal!\n");
1064 goto done;
1065 }
1066
1067 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
1068
1069 status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
1070 if (status < 0) {
1071 mlog_errno(status);
1072 goto done;
1073 }
1074
1075
1076 if (!local) {
1077 osb->commit_task = kthread_run(ocfs2_commit_thread, osb,
1078 "ocfs2cmt-%s", osb->uuid_str);
1079 if (IS_ERR(osb->commit_task)) {
1080 status = PTR_ERR(osb->commit_task);
1081 osb->commit_task = NULL;
1082 mlog(ML_ERROR, "unable to launch ocfs2commit thread, "
1083 "error=%d", status);
1084 goto done;
1085 }
1086 } else
1087 osb->commit_task = NULL;
1088
1089done:
1090 return status;
1091}
1092
1093
1094
1095
1096int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
1097{
1098 int status;
1099
1100 BUG_ON(!journal);
1101
1102 status = jbd2_journal_wipe(journal->j_journal, full);
1103 if (status < 0) {
1104 mlog_errno(status);
1105 goto bail;
1106 }
1107
1108 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
1109 if (status < 0)
1110 mlog_errno(status);
1111
1112bail:
1113 return status;
1114}
1115
1116static int ocfs2_recovery_completed(struct ocfs2_super *osb)
1117{
1118 int empty;
1119 struct ocfs2_recovery_map *rm = osb->recovery_map;
1120
1121 spin_lock(&osb->osb_lock);
1122 empty = (rm->rm_used == 0);
1123 spin_unlock(&osb->osb_lock);
1124
1125 return empty;
1126}
1127
1128void ocfs2_wait_for_recovery(struct ocfs2_super *osb)
1129{
1130 wait_event(osb->recovery_event, ocfs2_recovery_completed(osb));
1131}
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143static int ocfs2_force_read_journal(struct inode *inode)
1144{
1145 int status = 0;
1146 int i;
1147 u64 v_blkno, p_blkno, p_blocks, num_blocks;
1148 struct buffer_head *bh = NULL;
1149 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1150
1151 num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
1152 v_blkno = 0;
1153 while (v_blkno < num_blocks) {
1154 status = ocfs2_extent_map_get_blocks(inode, v_blkno,
1155 &p_blkno, &p_blocks, NULL);
1156 if (status < 0) {
1157 mlog_errno(status);
1158 goto bail;
1159 }
1160
1161 for (i = 0; i < p_blocks; i++, p_blkno++) {
1162 bh = __find_get_block(osb->sb->s_bdev, p_blkno,
1163 osb->sb->s_blocksize);
1164
1165 if (!bh)
1166 continue;
1167
1168 brelse(bh);
1169 bh = NULL;
1170
1171
1172
1173 status = ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
1174 if (status < 0) {
1175 mlog_errno(status);
1176 goto bail;
1177 }
1178
1179 brelse(bh);
1180 bh = NULL;
1181 }
1182
1183 v_blkno += p_blocks;
1184 }
1185
1186bail:
1187 return status;
1188}
1189
1190struct ocfs2_la_recovery_item {
1191 struct list_head lri_list;
1192 int lri_slot;
1193 struct ocfs2_dinode *lri_la_dinode;
1194 struct ocfs2_dinode *lri_tl_dinode;
1195 struct ocfs2_quota_recovery *lri_qrec;
1196 enum ocfs2_orphan_reco_type lri_orphan_reco_type;
1197};
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209void ocfs2_complete_recovery(struct work_struct *work)
1210{
1211 int ret = 0;
1212 struct ocfs2_journal *journal =
1213 container_of(work, struct ocfs2_journal, j_recovery_work);
1214 struct ocfs2_super *osb = journal->j_osb;
1215 struct ocfs2_dinode *la_dinode, *tl_dinode;
1216 struct ocfs2_la_recovery_item *item, *n;
1217 struct ocfs2_quota_recovery *qrec;
1218 enum ocfs2_orphan_reco_type orphan_reco_type;
1219 LIST_HEAD(tmp_la_list);
1220
1221 trace_ocfs2_complete_recovery(
1222 (unsigned long long)OCFS2_I(journal->j_inode)->ip_blkno);
1223
1224 spin_lock(&journal->j_lock);
1225 list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
1226 spin_unlock(&journal->j_lock);
1227
1228 list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
1229 list_del_init(&item->lri_list);
1230
1231 ocfs2_wait_on_quotas(osb);
1232
1233 la_dinode = item->lri_la_dinode;
1234 tl_dinode = item->lri_tl_dinode;
1235 qrec = item->lri_qrec;
1236 orphan_reco_type = item->lri_orphan_reco_type;
1237
1238 trace_ocfs2_complete_recovery_slot(item->lri_slot,
1239 la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0,
1240 tl_dinode ? le64_to_cpu(tl_dinode->i_blkno) : 0,
1241 qrec);
1242
1243 if (la_dinode) {
1244 ret = ocfs2_complete_local_alloc_recovery(osb,
1245 la_dinode);
1246 if (ret < 0)
1247 mlog_errno(ret);
1248
1249 kfree(la_dinode);
1250 }
1251
1252 if (tl_dinode) {
1253 ret = ocfs2_complete_truncate_log_recovery(osb,
1254 tl_dinode);
1255 if (ret < 0)
1256 mlog_errno(ret);
1257
1258 kfree(tl_dinode);
1259 }
1260
1261 ret = ocfs2_recover_orphans(osb, item->lri_slot,
1262 orphan_reco_type);
1263 if (ret < 0)
1264 mlog_errno(ret);
1265
1266 if (qrec) {
1267 ret = ocfs2_finish_quota_recovery(osb, qrec,
1268 item->lri_slot);
1269 if (ret < 0)
1270 mlog_errno(ret);
1271
1272 }
1273
1274 kfree(item);
1275 }
1276
1277 trace_ocfs2_complete_recovery_end(ret);
1278}
1279
1280
1281
1282
1283static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
1284 int slot_num,
1285 struct ocfs2_dinode *la_dinode,
1286 struct ocfs2_dinode *tl_dinode,
1287 struct ocfs2_quota_recovery *qrec,
1288 enum ocfs2_orphan_reco_type orphan_reco_type)
1289{
1290 struct ocfs2_la_recovery_item *item;
1291
1292 item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS);
1293 if (!item) {
1294
1295
1296
1297 kfree(la_dinode);
1298 kfree(tl_dinode);
1299
1300 if (qrec)
1301 ocfs2_free_quota_recovery(qrec);
1302
1303 mlog_errno(-ENOMEM);
1304 return;
1305 }
1306
1307 INIT_LIST_HEAD(&item->lri_list);
1308 item->lri_la_dinode = la_dinode;
1309 item->lri_slot = slot_num;
1310 item->lri_tl_dinode = tl_dinode;
1311 item->lri_qrec = qrec;
1312 item->lri_orphan_reco_type = orphan_reco_type;
1313
1314 spin_lock(&journal->j_lock);
1315 list_add_tail(&item->lri_list, &journal->j_la_cleanups);
1316 queue_work(journal->j_osb->ocfs2_wq, &journal->j_recovery_work);
1317 spin_unlock(&journal->j_lock);
1318}
1319
1320
1321
1322void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
1323{
1324 struct ocfs2_journal *journal = osb->journal;
1325
1326 if (ocfs2_is_hard_readonly(osb))
1327 return;
1328
1329
1330
1331 ocfs2_queue_recovery_completion(journal, osb->slot_num,
1332 osb->local_alloc_copy, NULL, NULL,
1333 ORPHAN_NEED_TRUNCATE);
1334 ocfs2_schedule_truncate_log_flush(osb, 0);
1335
1336 osb->local_alloc_copy = NULL;
1337
1338
1339 ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
1340 ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
1341 ocfs2_free_replay_slots(osb);
1342}
1343
1344void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
1345{
1346 if (osb->quota_rec) {
1347 ocfs2_queue_recovery_completion(osb->journal,
1348 osb->slot_num,
1349 NULL,
1350 NULL,
1351 osb->quota_rec,
1352 ORPHAN_NEED_TRUNCATE);
1353 osb->quota_rec = NULL;
1354 }
1355}
1356
1357static int __ocfs2_recovery_thread(void *arg)
1358{
1359 int status, node_num, slot_num;
1360 struct ocfs2_super *osb = arg;
1361 struct ocfs2_recovery_map *rm = osb->recovery_map;
1362 int *rm_quota = NULL;
1363 int rm_quota_used = 0, i;
1364 struct ocfs2_quota_recovery *qrec;
1365
1366
1367 int quota_enabled = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
1368 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
1369 || OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
1370 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA);
1371
1372 status = ocfs2_wait_on_mount(osb);
1373 if (status < 0) {
1374 goto bail;
1375 }
1376
1377 if (quota_enabled) {
1378 rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS);
1379 if (!rm_quota) {
1380 status = -ENOMEM;
1381 goto bail;
1382 }
1383 }
1384restart:
1385 status = ocfs2_super_lock(osb, 1);
1386 if (status < 0) {
1387 mlog_errno(status);
1388 goto bail;
1389 }
1390
1391 status = ocfs2_compute_replay_slots(osb);
1392 if (status < 0)
1393 mlog_errno(status);
1394
1395
1396 ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
1397 NULL, NULL, ORPHAN_NO_NEED_TRUNCATE);
1398
1399 spin_lock(&osb->osb_lock);
1400 while (rm->rm_used) {
1401
1402
1403 node_num = rm->rm_entries[0];
1404 spin_unlock(&osb->osb_lock);
1405 slot_num = ocfs2_node_num_to_slot(osb, node_num);
1406 trace_ocfs2_recovery_thread_node(node_num, slot_num);
1407 if (slot_num == -ENOENT) {
1408 status = 0;
1409 goto skip_recovery;
1410 }
1411
1412
1413
1414
1415
1416
1417
1418 if (quota_enabled) {
1419 for (i = 0; i < rm_quota_used
1420 && rm_quota[i] != slot_num; i++)
1421 ;
1422
1423 if (i == rm_quota_used)
1424 rm_quota[rm_quota_used++] = slot_num;
1425 }
1426
1427 status = ocfs2_recover_node(osb, node_num, slot_num);
1428skip_recovery:
1429 if (!status) {
1430 ocfs2_recovery_map_clear(osb, node_num);
1431 } else {
1432 mlog(ML_ERROR,
1433 "Error %d recovering node %d on device (%u,%u)!\n",
1434 status, node_num,
1435 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
1436 mlog(ML_ERROR, "Volume requires unmount.\n");
1437 }
1438
1439 spin_lock(&osb->osb_lock);
1440 }
1441 spin_unlock(&osb->osb_lock);
1442 trace_ocfs2_recovery_thread_end(status);
1443
1444
1445 status = ocfs2_check_journals_nolocks(osb);
1446 status = (status == -EROFS) ? 0 : status;
1447 if (status < 0)
1448 mlog_errno(status);
1449
1450
1451
1452
1453 if (quota_enabled) {
1454 for (i = 0; i < rm_quota_used; i++) {
1455 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
1456 if (IS_ERR(qrec)) {
1457 status = PTR_ERR(qrec);
1458 mlog_errno(status);
1459 continue;
1460 }
1461 ocfs2_queue_recovery_completion(osb->journal,
1462 rm_quota[i],
1463 NULL, NULL, qrec,
1464 ORPHAN_NEED_TRUNCATE);
1465 }
1466 }
1467
1468 ocfs2_super_unlock(osb, 1);
1469
1470
1471 ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
1472
1473bail:
1474 mutex_lock(&osb->recovery_lock);
1475 if (!status && !ocfs2_recovery_completed(osb)) {
1476 mutex_unlock(&osb->recovery_lock);
1477 goto restart;
1478 }
1479
1480 ocfs2_free_replay_slots(osb);
1481 osb->recovery_thread_task = NULL;
1482 mb();
1483 wake_up(&osb->recovery_event);
1484
1485 mutex_unlock(&osb->recovery_lock);
1486
1487 if (quota_enabled)
1488 kfree(rm_quota);
1489
1490
1491
1492
1493 complete_and_exit(NULL, status);
1494}
1495
1496void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
1497{
1498 mutex_lock(&osb->recovery_lock);
1499
1500 trace_ocfs2_recovery_thread(node_num, osb->node_num,
1501 osb->disable_recovery, osb->recovery_thread_task,
1502 osb->disable_recovery ?
1503 -1 : ocfs2_recovery_map_set(osb, node_num));
1504
1505 if (osb->disable_recovery)
1506 goto out;
1507
1508 if (osb->recovery_thread_task)
1509 goto out;
1510
1511 osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb,
1512 "ocfs2rec-%s", osb->uuid_str);
1513 if (IS_ERR(osb->recovery_thread_task)) {
1514 mlog_errno((int)PTR_ERR(osb->recovery_thread_task));
1515 osb->recovery_thread_task = NULL;
1516 }
1517
1518out:
1519 mutex_unlock(&osb->recovery_lock);
1520 wake_up(&osb->recovery_event);
1521}
1522
1523static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
1524 int slot_num,
1525 struct buffer_head **bh,
1526 struct inode **ret_inode)
1527{
1528 int status = -EACCES;
1529 struct inode *inode = NULL;
1530
1531 BUG_ON(slot_num >= osb->max_slots);
1532
1533 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1534 slot_num);
1535 if (!inode || is_bad_inode(inode)) {
1536 mlog_errno(status);
1537 goto bail;
1538 }
1539 SET_INODE_JOURNAL(inode);
1540
1541 status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE);
1542 if (status < 0) {
1543 mlog_errno(status);
1544 goto bail;
1545 }
1546
1547 status = 0;
1548
1549bail:
1550 if (inode) {
1551 if (status || !ret_inode)
1552 iput(inode);
1553 else
1554 *ret_inode = inode;
1555 }
1556 return status;
1557}
1558
1559
1560
1561static int ocfs2_replay_journal(struct ocfs2_super *osb,
1562 int node_num,
1563 int slot_num)
1564{
1565 int status;
1566 int got_lock = 0;
1567 unsigned int flags;
1568 struct inode *inode = NULL;
1569 struct ocfs2_dinode *fe;
1570 journal_t *journal = NULL;
1571 struct buffer_head *bh = NULL;
1572 u32 slot_reco_gen;
1573
1574 status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
1575 if (status) {
1576 mlog_errno(status);
1577 goto done;
1578 }
1579
1580 fe = (struct ocfs2_dinode *)bh->b_data;
1581 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1582 brelse(bh);
1583 bh = NULL;
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593 if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
1594 trace_ocfs2_replay_journal_recovered(slot_num,
1595 osb->slot_recovery_generations[slot_num], slot_reco_gen);
1596 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1597 status = -EBUSY;
1598 goto done;
1599 }
1600
1601
1602
1603 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
1604 if (status < 0) {
1605 trace_ocfs2_replay_journal_lock_err(status);
1606 if (status != -ERESTARTSYS)
1607 mlog(ML_ERROR, "Could not lock journal!\n");
1608 goto done;
1609 }
1610 got_lock = 1;
1611
1612 fe = (struct ocfs2_dinode *) bh->b_data;
1613
1614 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1615 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1616
1617 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
1618 trace_ocfs2_replay_journal_skip(node_num);
1619
1620 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1621 goto done;
1622 }
1623
1624
1625 ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
1626
1627 printk(KERN_NOTICE "ocfs2: Begin replay journal (node %d, slot %d) on "\
1628 "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev),
1629 MINOR(osb->sb->s_dev));
1630
1631 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
1632
1633 status = ocfs2_force_read_journal(inode);
1634 if (status < 0) {
1635 mlog_errno(status);
1636 goto done;
1637 }
1638
1639 journal = jbd2_journal_init_inode(inode);
1640 if (journal == NULL) {
1641 mlog(ML_ERROR, "Linux journal layer error\n");
1642 status = -EIO;
1643 goto done;
1644 }
1645
1646 status = jbd2_journal_load(journal);
1647 if (status < 0) {
1648 mlog_errno(status);
1649 if (!igrab(inode))
1650 BUG();
1651 jbd2_journal_destroy(journal);
1652 goto done;
1653 }
1654
1655 ocfs2_clear_journal_error(osb->sb, journal, slot_num);
1656
1657
1658 jbd2_journal_lock_updates(journal);
1659 status = jbd2_journal_flush(journal);
1660 jbd2_journal_unlock_updates(journal);
1661 if (status < 0)
1662 mlog_errno(status);
1663
1664
1665 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1666 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
1667 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
1668
1669
1670 ocfs2_bump_recovery_generation(fe);
1671 osb->slot_recovery_generations[slot_num] =
1672 ocfs2_get_recovery_generation(fe);
1673
1674 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
1675 status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
1676 if (status < 0)
1677 mlog_errno(status);
1678
1679 if (!igrab(inode))
1680 BUG();
1681
1682 jbd2_journal_destroy(journal);
1683
1684 printk(KERN_NOTICE "ocfs2: End replay journal (node %d, slot %d) on "\
1685 "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev),
1686 MINOR(osb->sb->s_dev));
1687done:
1688
1689 if (got_lock)
1690 ocfs2_inode_unlock(inode, 1);
1691
1692 iput(inode);
1693 brelse(bh);
1694
1695 return status;
1696}
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710static int ocfs2_recover_node(struct ocfs2_super *osb,
1711 int node_num, int slot_num)
1712{
1713 int status = 0;
1714 struct ocfs2_dinode *la_copy = NULL;
1715 struct ocfs2_dinode *tl_copy = NULL;
1716
1717 trace_ocfs2_recover_node(node_num, slot_num, osb->node_num);
1718
1719
1720
1721 BUG_ON(osb->node_num == node_num);
1722
1723 status = ocfs2_replay_journal(osb, node_num, slot_num);
1724 if (status < 0) {
1725 if (status == -EBUSY) {
1726 trace_ocfs2_recover_node_skip(slot_num, node_num);
1727 status = 0;
1728 goto done;
1729 }
1730 mlog_errno(status);
1731 goto done;
1732 }
1733
1734
1735 status = ocfs2_begin_local_alloc_recovery(osb, slot_num, &la_copy);
1736 if (status < 0) {
1737 mlog_errno(status);
1738 goto done;
1739 }
1740
1741
1742
1743
1744 status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy);
1745 if (status < 0)
1746 mlog_errno(status);
1747
1748
1749
1750 status = ocfs2_clear_slot(osb, slot_num);
1751 if (status < 0)
1752 mlog_errno(status);
1753
1754
1755 ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
1756 tl_copy, NULL, ORPHAN_NEED_TRUNCATE);
1757
1758 status = 0;
1759done:
1760
1761 return status;
1762}
1763
1764
1765
1766
1767static int ocfs2_trylock_journal(struct ocfs2_super *osb,
1768 int slot_num)
1769{
1770 int status, flags;
1771 struct inode *inode = NULL;
1772
1773 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1774 slot_num);
1775 if (inode == NULL) {
1776 mlog(ML_ERROR, "access error\n");
1777 status = -EACCES;
1778 goto bail;
1779 }
1780 if (is_bad_inode(inode)) {
1781 mlog(ML_ERROR, "access error (bad inode)\n");
1782 iput(inode);
1783 inode = NULL;
1784 status = -EACCES;
1785 goto bail;
1786 }
1787 SET_INODE_JOURNAL(inode);
1788
1789 flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE;
1790 status = ocfs2_inode_lock_full(inode, NULL, 1, flags);
1791 if (status < 0) {
1792 if (status != -EAGAIN)
1793 mlog_errno(status);
1794 goto bail;
1795 }
1796
1797 ocfs2_inode_unlock(inode, 1);
1798bail:
1799 iput(inode);
1800
1801 return status;
1802}
1803
1804
1805
1806int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1807{
1808 unsigned int node_num;
1809 int status, i;
1810 u32 gen;
1811 struct buffer_head *bh = NULL;
1812 struct ocfs2_dinode *di;
1813
1814
1815
1816
1817 for (i = 0; i < osb->max_slots; i++) {
1818
1819 status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
1820 if (status) {
1821 mlog_errno(status);
1822 goto bail;
1823 }
1824 di = (struct ocfs2_dinode *)bh->b_data;
1825 gen = ocfs2_get_recovery_generation(di);
1826 brelse(bh);
1827 bh = NULL;
1828
1829 spin_lock(&osb->osb_lock);
1830 osb->slot_recovery_generations[i] = gen;
1831
1832 trace_ocfs2_mark_dead_nodes(i,
1833 osb->slot_recovery_generations[i]);
1834
1835 if (i == osb->slot_num) {
1836 spin_unlock(&osb->osb_lock);
1837 continue;
1838 }
1839
1840 status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
1841 if (status == -ENOENT) {
1842 spin_unlock(&osb->osb_lock);
1843 continue;
1844 }
1845
1846 if (__ocfs2_recovery_map_test(osb, node_num)) {
1847 spin_unlock(&osb->osb_lock);
1848 continue;
1849 }
1850 spin_unlock(&osb->osb_lock);
1851
1852
1853
1854
1855 status = ocfs2_trylock_journal(osb, i);
1856 if (!status) {
1857
1858
1859
1860 ocfs2_recovery_thread(osb, node_num);
1861 } else if ((status < 0) && (status != -EAGAIN)) {
1862 mlog_errno(status);
1863 goto bail;
1864 }
1865 }
1866
1867 status = 0;
1868bail:
1869 return status;
1870}
1871
1872
1873
1874
1875
1876
1877static inline unsigned long ocfs2_orphan_scan_timeout(void)
1878{
1879 unsigned long time;
1880
1881 get_random_bytes(&time, sizeof(time));
1882 time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000);
1883 return msecs_to_jiffies(time);
1884}
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913static void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1914{
1915 struct ocfs2_orphan_scan *os;
1916 int status, i;
1917 u32 seqno = 0;
1918
1919 os = &osb->osb_orphan_scan;
1920
1921 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1922 goto out;
1923
1924 trace_ocfs2_queue_orphan_scan_begin(os->os_count, os->os_seqno,
1925 atomic_read(&os->os_state));
1926
1927 status = ocfs2_orphan_scan_lock(osb, &seqno);
1928 if (status < 0) {
1929 if (status != -EAGAIN)
1930 mlog_errno(status);
1931 goto out;
1932 }
1933
1934
1935 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1936 goto unlock;
1937
1938 if (os->os_seqno != seqno) {
1939 os->os_seqno = seqno;
1940 goto unlock;
1941 }
1942
1943 for (i = 0; i < osb->max_slots; i++)
1944 ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
1945 NULL, ORPHAN_NO_NEED_TRUNCATE);
1946
1947
1948
1949
1950 seqno++;
1951 os->os_count++;
1952 os->os_scantime = ktime_get_seconds();
1953unlock:
1954 ocfs2_orphan_scan_unlock(osb, seqno);
1955out:
1956 trace_ocfs2_queue_orphan_scan_end(os->os_count, os->os_seqno,
1957 atomic_read(&os->os_state));
1958 return;
1959}
1960
1961
1962static void ocfs2_orphan_scan_work(struct work_struct *work)
1963{
1964 struct ocfs2_orphan_scan *os;
1965 struct ocfs2_super *osb;
1966
1967 os = container_of(work, struct ocfs2_orphan_scan,
1968 os_orphan_scan_work.work);
1969 osb = os->os_osb;
1970
1971 mutex_lock(&os->os_lock);
1972 ocfs2_queue_orphan_scan(osb);
1973 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
1974 queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
1975 ocfs2_orphan_scan_timeout());
1976 mutex_unlock(&os->os_lock);
1977}
1978
1979void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
1980{
1981 struct ocfs2_orphan_scan *os;
1982
1983 os = &osb->osb_orphan_scan;
1984 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) {
1985 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1986 mutex_lock(&os->os_lock);
1987 cancel_delayed_work(&os->os_orphan_scan_work);
1988 mutex_unlock(&os->os_lock);
1989 }
1990}
1991
1992void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
1993{
1994 struct ocfs2_orphan_scan *os;
1995
1996 os = &osb->osb_orphan_scan;
1997 os->os_osb = osb;
1998 os->os_count = 0;
1999 os->os_seqno = 0;
2000 mutex_init(&os->os_lock);
2001 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
2002}
2003
2004void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
2005{
2006 struct ocfs2_orphan_scan *os;
2007
2008 os = &osb->osb_orphan_scan;
2009 os->os_scantime = ktime_get_seconds();
2010 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
2011 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
2012 else {
2013 atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
2014 queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
2015 ocfs2_orphan_scan_timeout());
2016 }
2017}
2018
2019struct ocfs2_orphan_filldir_priv {
2020 struct dir_context ctx;
2021 struct inode *head;
2022 struct ocfs2_super *osb;
2023 enum ocfs2_orphan_reco_type orphan_reco_type;
2024};
2025
2026static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
2027 int name_len, loff_t pos, u64 ino,
2028 unsigned type)
2029{
2030 struct ocfs2_orphan_filldir_priv *p =
2031 container_of(ctx, struct ocfs2_orphan_filldir_priv, ctx);
2032 struct inode *iter;
2033
2034 if (name_len == 1 && !strncmp(".", name, 1))
2035 return 0;
2036 if (name_len == 2 && !strncmp("..", name, 2))
2037 return 0;
2038
2039
2040 if ((p->orphan_reco_type == ORPHAN_NO_NEED_TRUNCATE) &&
2041 (!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX,
2042 OCFS2_DIO_ORPHAN_PREFIX_LEN)))
2043 return 0;
2044
2045
2046 iter = ocfs2_iget(p->osb, ino,
2047 OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0);
2048 if (IS_ERR(iter))
2049 return 0;
2050
2051 if (!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX,
2052 OCFS2_DIO_ORPHAN_PREFIX_LEN))
2053 OCFS2_I(iter)->ip_flags |= OCFS2_INODE_DIO_ORPHAN_ENTRY;
2054
2055
2056
2057 if (OCFS2_I(iter)->ip_next_orphan) {
2058 iput(iter);
2059 return 0;
2060 }
2061
2062 trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno);
2063
2064
2065 OCFS2_I(iter)->ip_next_orphan = p->head;
2066 p->head = iter;
2067
2068 return 0;
2069}
2070
2071static int ocfs2_queue_orphans(struct ocfs2_super *osb,
2072 int slot,
2073 struct inode **head,
2074 enum ocfs2_orphan_reco_type orphan_reco_type)
2075{
2076 int status;
2077 struct inode *orphan_dir_inode = NULL;
2078 struct ocfs2_orphan_filldir_priv priv = {
2079 .ctx.actor = ocfs2_orphan_filldir,
2080 .osb = osb,
2081 .head = *head,
2082 .orphan_reco_type = orphan_reco_type
2083 };
2084
2085 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2086 ORPHAN_DIR_SYSTEM_INODE,
2087 slot);
2088 if (!orphan_dir_inode) {
2089 status = -ENOENT;
2090 mlog_errno(status);
2091 return status;
2092 }
2093
2094 inode_lock(orphan_dir_inode);
2095 status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
2096 if (status < 0) {
2097 mlog_errno(status);
2098 goto out;
2099 }
2100
2101 status = ocfs2_dir_foreach(orphan_dir_inode, &priv.ctx);
2102 if (status) {
2103 mlog_errno(status);
2104 goto out_cluster;
2105 }
2106
2107 *head = priv.head;
2108
2109out_cluster:
2110 ocfs2_inode_unlock(orphan_dir_inode, 0);
2111out:
2112 inode_unlock(orphan_dir_inode);
2113 iput(orphan_dir_inode);
2114 return status;
2115}
2116
2117static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb,
2118 int slot)
2119{
2120 int ret;
2121
2122 spin_lock(&osb->osb_lock);
2123 ret = !osb->osb_orphan_wipes[slot];
2124 spin_unlock(&osb->osb_lock);
2125 return ret;
2126}
2127
2128static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb,
2129 int slot)
2130{
2131 spin_lock(&osb->osb_lock);
2132
2133
2134 ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
2135 while (osb->osb_orphan_wipes[slot]) {
2136
2137
2138
2139 spin_unlock(&osb->osb_lock);
2140 wait_event_interruptible(osb->osb_wipe_event,
2141 ocfs2_orphan_recovery_can_continue(osb, slot));
2142 spin_lock(&osb->osb_lock);
2143 }
2144 spin_unlock(&osb->osb_lock);
2145}
2146
2147static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,
2148 int slot)
2149{
2150 ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
2151}
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171static int ocfs2_recover_orphans(struct ocfs2_super *osb,
2172 int slot,
2173 enum ocfs2_orphan_reco_type orphan_reco_type)
2174{
2175 int ret = 0;
2176 struct inode *inode = NULL;
2177 struct inode *iter;
2178 struct ocfs2_inode_info *oi;
2179 struct buffer_head *di_bh = NULL;
2180 struct ocfs2_dinode *di = NULL;
2181
2182 trace_ocfs2_recover_orphans(slot);
2183
2184 ocfs2_mark_recovering_orphan_dir(osb, slot);
2185 ret = ocfs2_queue_orphans(osb, slot, &inode, orphan_reco_type);
2186 ocfs2_clear_recovering_orphan_dir(osb, slot);
2187
2188
2189
2190 if (ret)
2191 mlog_errno(ret);
2192
2193 while (inode) {
2194 oi = OCFS2_I(inode);
2195 trace_ocfs2_recover_orphans_iput(
2196 (unsigned long long)oi->ip_blkno);
2197
2198 iter = oi->ip_next_orphan;
2199 oi->ip_next_orphan = NULL;
2200
2201 if (oi->ip_flags & OCFS2_INODE_DIO_ORPHAN_ENTRY) {
2202 inode_lock(inode);
2203 ret = ocfs2_rw_lock(inode, 1);
2204 if (ret < 0) {
2205 mlog_errno(ret);
2206 goto unlock_mutex;
2207 }
2208
2209
2210
2211
2212 ret = ocfs2_inode_lock(inode, &di_bh, 1);
2213 if (ret) {
2214 mlog_errno(ret);
2215 goto unlock_rw;
2216 }
2217
2218 di = (struct ocfs2_dinode *)di_bh->b_data;
2219
2220 if (di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL)) {
2221 ret = ocfs2_truncate_file(inode, di_bh,
2222 i_size_read(inode));
2223 if (ret < 0) {
2224 if (ret != -ENOSPC)
2225 mlog_errno(ret);
2226 goto unlock_inode;
2227 }
2228
2229 ret = ocfs2_del_inode_from_orphan(osb, inode,
2230 di_bh, 0, 0);
2231 if (ret)
2232 mlog_errno(ret);
2233 }
2234unlock_inode:
2235 ocfs2_inode_unlock(inode, 1);
2236 brelse(di_bh);
2237 di_bh = NULL;
2238unlock_rw:
2239 ocfs2_rw_unlock(inode, 1);
2240unlock_mutex:
2241 inode_unlock(inode);
2242
2243
2244 oi->ip_flags &= ~OCFS2_INODE_DIO_ORPHAN_ENTRY;
2245 } else {
2246 spin_lock(&oi->ip_lock);
2247
2248
2249 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
2250 spin_unlock(&oi->ip_lock);
2251 }
2252
2253 iput(inode);
2254 inode = iter;
2255 }
2256
2257 return ret;
2258}
2259
2260static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
2261{
2262
2263
2264
2265 wait_event(osb->osb_mount_event,
2266 (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) ||
2267 atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS ||
2268 atomic_read(&osb->vol_state) == VOLUME_DISABLED);
2269
2270
2271
2272
2273 if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) {
2274 trace_ocfs2_wait_on_mount(VOLUME_DISABLED);
2275 mlog(0, "mount error, exiting!\n");
2276 return -EBUSY;
2277 }
2278
2279 return 0;
2280}
2281
2282static int ocfs2_commit_thread(void *arg)
2283{
2284 int status;
2285 struct ocfs2_super *osb = arg;
2286 struct ocfs2_journal *journal = osb->journal;
2287
2288
2289
2290
2291
2292 while (!(kthread_should_stop() &&
2293 atomic_read(&journal->j_num_trans) == 0)) {
2294
2295 wait_event_interruptible(osb->checkpoint_event,
2296 atomic_read(&journal->j_num_trans)
2297 || kthread_should_stop());
2298
2299 status = ocfs2_commit_cache(osb);
2300 if (status < 0) {
2301 static unsigned long abort_warn_time;
2302
2303
2304 if (printk_timed_ratelimit(&abort_warn_time, 60*HZ))
2305 mlog(ML_ERROR, "status = %d, journal is "
2306 "already aborted.\n", status);
2307
2308
2309
2310
2311
2312 msleep_interruptible(1000);
2313 }
2314
2315 if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){
2316 mlog(ML_KTHREAD,
2317 "commit_thread: %u transactions pending on "
2318 "shutdown\n",
2319 atomic_read(&journal->j_num_trans));
2320 }
2321 }
2322
2323 return 0;
2324}
2325
2326
2327
2328
2329
2330
2331int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
2332{
2333 int ret = 0;
2334 unsigned int slot;
2335 struct buffer_head *di_bh = NULL;
2336 struct ocfs2_dinode *di;
2337 int journal_dirty = 0;
2338
2339 for(slot = 0; slot < osb->max_slots; slot++) {
2340 ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
2341 if (ret) {
2342 mlog_errno(ret);
2343 goto out;
2344 }
2345
2346 di = (struct ocfs2_dinode *) di_bh->b_data;
2347
2348 osb->slot_recovery_generations[slot] =
2349 ocfs2_get_recovery_generation(di);
2350
2351 if (le32_to_cpu(di->id1.journal1.ij_flags) &
2352 OCFS2_JOURNAL_DIRTY_FL)
2353 journal_dirty = 1;
2354
2355 brelse(di_bh);
2356 di_bh = NULL;
2357 }
2358
2359out:
2360 if (journal_dirty)
2361 ret = -EROFS;
2362 return ret;
2363}
2364