1
2
3
4
5
6
7
8
9
10
11
12#include <linux/fs.h>
13#include <linux/types.h>
14#include <linux/slab.h>
15#include <linux/highmem.h>
16#include <linux/kthread.h>
17#include <linux/time.h>
18#include <linux/random.h>
19#include <linux/delay.h>
20
21#include <cluster/masklog.h>
22
23#include "ocfs2.h"
24
25#include "alloc.h"
26#include "blockcheck.h"
27#include "dir.h"
28#include "dlmglue.h"
29#include "extent_map.h"
30#include "heartbeat.h"
31#include "inode.h"
32#include "journal.h"
33#include "localalloc.h"
34#include "slot_map.h"
35#include "super.h"
36#include "sysfile.h"
37#include "uptodate.h"
38#include "quota.h"
39#include "file.h"
40#include "namei.h"
41
42#include "buffer_head_io.h"
43#include "ocfs2_trace.h"
44
45DEFINE_SPINLOCK(trans_inc_lock);
46
47#define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000
48
49static int ocfs2_force_read_journal(struct inode *inode);
50static int ocfs2_recover_node(struct ocfs2_super *osb,
51 int node_num, int slot_num);
52static int __ocfs2_recovery_thread(void *arg);
53static int ocfs2_commit_cache(struct ocfs2_super *osb);
54static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota);
55static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
56 int dirty, int replayed);
57static int ocfs2_trylock_journal(struct ocfs2_super *osb,
58 int slot_num);
59static int ocfs2_recover_orphans(struct ocfs2_super *osb,
60 int slot,
61 enum ocfs2_orphan_reco_type orphan_reco_type);
62static int ocfs2_commit_thread(void *arg);
63static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
64 int slot_num,
65 struct ocfs2_dinode *la_dinode,
66 struct ocfs2_dinode *tl_dinode,
67 struct ocfs2_quota_recovery *qrec,
68 enum ocfs2_orphan_reco_type orphan_reco_type);
69
70static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
71{
72 return __ocfs2_wait_on_mount(osb, 0);
73}
74
75static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
76{
77 return __ocfs2_wait_on_mount(osb, 1);
78}
79
80
81
82
83
84
85enum ocfs2_replay_state {
86 REPLAY_UNNEEDED = 0,
87 REPLAY_NEEDED,
88 REPLAY_DONE
89};
90
91struct ocfs2_replay_map {
92 unsigned int rm_slots;
93 enum ocfs2_replay_state rm_state;
94 unsigned char rm_replay_slots[];
95};
96
97static void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
98{
99 if (!osb->replay_map)
100 return;
101
102
103 if (osb->replay_map->rm_state == REPLAY_DONE)
104 return;
105
106 osb->replay_map->rm_state = state;
107}
108
109int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
110{
111 struct ocfs2_replay_map *replay_map;
112 int i, node_num;
113
114
115 if (osb->replay_map)
116 return 0;
117
118 replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
119 (osb->max_slots * sizeof(char)), GFP_KERNEL);
120
121 if (!replay_map) {
122 mlog_errno(-ENOMEM);
123 return -ENOMEM;
124 }
125
126 spin_lock(&osb->osb_lock);
127
128 replay_map->rm_slots = osb->max_slots;
129 replay_map->rm_state = REPLAY_UNNEEDED;
130
131
132 for (i = 0; i < replay_map->rm_slots; i++) {
133 if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT)
134 replay_map->rm_replay_slots[i] = 1;
135 }
136
137 osb->replay_map = replay_map;
138 spin_unlock(&osb->osb_lock);
139 return 0;
140}
141
142static void ocfs2_queue_replay_slots(struct ocfs2_super *osb,
143 enum ocfs2_orphan_reco_type orphan_reco_type)
144{
145 struct ocfs2_replay_map *replay_map = osb->replay_map;
146 int i;
147
148 if (!replay_map)
149 return;
150
151 if (replay_map->rm_state != REPLAY_NEEDED)
152 return;
153
154 for (i = 0; i < replay_map->rm_slots; i++)
155 if (replay_map->rm_replay_slots[i])
156 ocfs2_queue_recovery_completion(osb->journal, i, NULL,
157 NULL, NULL,
158 orphan_reco_type);
159 replay_map->rm_state = REPLAY_DONE;
160}
161
162static void ocfs2_free_replay_slots(struct ocfs2_super *osb)
163{
164 struct ocfs2_replay_map *replay_map = osb->replay_map;
165
166 if (!osb->replay_map)
167 return;
168
169 kfree(replay_map);
170 osb->replay_map = NULL;
171}
172
173int ocfs2_recovery_init(struct ocfs2_super *osb)
174{
175 struct ocfs2_recovery_map *rm;
176
177 mutex_init(&osb->recovery_lock);
178 osb->disable_recovery = 0;
179 osb->recovery_thread_task = NULL;
180 init_waitqueue_head(&osb->recovery_event);
181
182 rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
183 osb->max_slots * sizeof(unsigned int),
184 GFP_KERNEL);
185 if (!rm) {
186 mlog_errno(-ENOMEM);
187 return -ENOMEM;
188 }
189
190 rm->rm_entries = (unsigned int *)((char *)rm +
191 sizeof(struct ocfs2_recovery_map));
192 osb->recovery_map = rm;
193
194 return 0;
195}
196
197
198
199
200static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
201{
202 mb();
203 return osb->recovery_thread_task != NULL;
204}
205
206void ocfs2_recovery_exit(struct ocfs2_super *osb)
207{
208 struct ocfs2_recovery_map *rm;
209
210
211
212 mutex_lock(&osb->recovery_lock);
213 osb->disable_recovery = 1;
214 mutex_unlock(&osb->recovery_lock);
215 wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
216
217
218
219
220 if (osb->ocfs2_wq)
221 flush_workqueue(osb->ocfs2_wq);
222
223
224
225
226
227 rm = osb->recovery_map;
228
229
230 kfree(rm);
231}
232
233static int __ocfs2_recovery_map_test(struct ocfs2_super *osb,
234 unsigned int node_num)
235{
236 int i;
237 struct ocfs2_recovery_map *rm = osb->recovery_map;
238
239 assert_spin_locked(&osb->osb_lock);
240
241 for (i = 0; i < rm->rm_used; i++) {
242 if (rm->rm_entries[i] == node_num)
243 return 1;
244 }
245
246 return 0;
247}
248
249
250static int ocfs2_recovery_map_set(struct ocfs2_super *osb,
251 unsigned int node_num)
252{
253 struct ocfs2_recovery_map *rm = osb->recovery_map;
254
255 spin_lock(&osb->osb_lock);
256 if (__ocfs2_recovery_map_test(osb, node_num)) {
257 spin_unlock(&osb->osb_lock);
258 return 1;
259 }
260
261
262 BUG_ON(rm->rm_used >= osb->max_slots);
263
264 rm->rm_entries[rm->rm_used] = node_num;
265 rm->rm_used++;
266 spin_unlock(&osb->osb_lock);
267
268 return 0;
269}
270
271static void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
272 unsigned int node_num)
273{
274 int i;
275 struct ocfs2_recovery_map *rm = osb->recovery_map;
276
277 spin_lock(&osb->osb_lock);
278
279 for (i = 0; i < rm->rm_used; i++) {
280 if (rm->rm_entries[i] == node_num)
281 break;
282 }
283
284 if (i < rm->rm_used) {
285
286 memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]),
287 (rm->rm_used - i - 1) * sizeof(unsigned int));
288 rm->rm_used--;
289 }
290
291 spin_unlock(&osb->osb_lock);
292}
293
294static int ocfs2_commit_cache(struct ocfs2_super *osb)
295{
296 int status = 0;
297 unsigned int flushed;
298 struct ocfs2_journal *journal = NULL;
299
300 journal = osb->journal;
301
302
303 down_write(&journal->j_trans_barrier);
304
305 flushed = atomic_read(&journal->j_num_trans);
306 trace_ocfs2_commit_cache_begin(flushed);
307 if (flushed == 0) {
308 up_write(&journal->j_trans_barrier);
309 goto finally;
310 }
311
312 jbd2_journal_lock_updates(journal->j_journal);
313 status = jbd2_journal_flush(journal->j_journal);
314 jbd2_journal_unlock_updates(journal->j_journal);
315 if (status < 0) {
316 up_write(&journal->j_trans_barrier);
317 mlog_errno(status);
318 goto finally;
319 }
320
321 ocfs2_inc_trans_id(journal);
322
323 flushed = atomic_read(&journal->j_num_trans);
324 atomic_set(&journal->j_num_trans, 0);
325 up_write(&journal->j_trans_barrier);
326
327 trace_ocfs2_commit_cache_end(journal->j_trans_id, flushed);
328
329 ocfs2_wake_downconvert_thread(osb);
330 wake_up(&journal->j_checkpointed);
331finally:
332 return status;
333}
334
335handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
336{
337 journal_t *journal = osb->journal->j_journal;
338 handle_t *handle;
339
340 BUG_ON(!osb || !osb->journal->j_journal);
341
342 if (ocfs2_is_hard_readonly(osb))
343 return ERR_PTR(-EROFS);
344
345 BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
346 BUG_ON(max_buffs <= 0);
347
348
349 if (journal_current_handle())
350 return jbd2_journal_start(journal, max_buffs);
351
352 sb_start_intwrite(osb->sb);
353
354 down_read(&osb->journal->j_trans_barrier);
355
356 handle = jbd2_journal_start(journal, max_buffs);
357 if (IS_ERR(handle)) {
358 up_read(&osb->journal->j_trans_barrier);
359 sb_end_intwrite(osb->sb);
360
361 mlog_errno(PTR_ERR(handle));
362
363 if (is_journal_aborted(journal)) {
364 ocfs2_abort(osb->sb, "Detected aborted journal\n");
365 handle = ERR_PTR(-EROFS);
366 }
367 } else {
368 if (!ocfs2_mount_local(osb))
369 atomic_inc(&(osb->journal->j_num_trans));
370 }
371
372 return handle;
373}
374
375int ocfs2_commit_trans(struct ocfs2_super *osb,
376 handle_t *handle)
377{
378 int ret, nested;
379 struct ocfs2_journal *journal = osb->journal;
380
381 BUG_ON(!handle);
382
383 nested = handle->h_ref > 1;
384 ret = jbd2_journal_stop(handle);
385 if (ret < 0)
386 mlog_errno(ret);
387
388 if (!nested) {
389 up_read(&journal->j_trans_barrier);
390 sb_end_intwrite(osb->sb);
391 }
392
393 return ret;
394}
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413int ocfs2_extend_trans(handle_t *handle, int nblocks)
414{
415 int status, old_nblocks;
416
417 BUG_ON(!handle);
418 BUG_ON(nblocks < 0);
419
420 if (!nblocks)
421 return 0;
422
423 old_nblocks = jbd2_handle_buffer_credits(handle);
424
425 trace_ocfs2_extend_trans(old_nblocks, nblocks);
426
427#ifdef CONFIG_OCFS2_DEBUG_FS
428 status = 1;
429#else
430 status = jbd2_journal_extend(handle, nblocks, 0);
431 if (status < 0) {
432 mlog_errno(status);
433 goto bail;
434 }
435#endif
436
437 if (status > 0) {
438 trace_ocfs2_extend_trans_restart(old_nblocks + nblocks);
439 status = jbd2_journal_restart(handle,
440 old_nblocks + nblocks);
441 if (status < 0) {
442 mlog_errno(status);
443 goto bail;
444 }
445 }
446
447 status = 0;
448bail:
449 return status;
450}
451
452
453
454
455
456
457
458int ocfs2_allocate_extend_trans(handle_t *handle, int thresh)
459{
460 int status, old_nblks;
461
462 BUG_ON(!handle);
463
464 old_nblks = jbd2_handle_buffer_credits(handle);
465 trace_ocfs2_allocate_extend_trans(old_nblks, thresh);
466
467 if (old_nblks < thresh)
468 return 0;
469
470 status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA, 0);
471 if (status < 0) {
472 mlog_errno(status);
473 goto bail;
474 }
475
476 if (status > 0) {
477 status = jbd2_journal_restart(handle, OCFS2_MAX_TRANS_DATA);
478 if (status < 0)
479 mlog_errno(status);
480 }
481
482bail:
483 return status;
484}
485
486
487struct ocfs2_triggers {
488 struct jbd2_buffer_trigger_type ot_triggers;
489 int ot_offset;
490};
491
492static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
493{
494 return container_of(triggers, struct ocfs2_triggers, ot_triggers);
495}
496
497static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
498 struct buffer_head *bh,
499 void *data, size_t size)
500{
501 struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
502
503
504
505
506
507
508
509 ocfs2_block_check_compute(data, size, data + ot->ot_offset);
510}
511
512
513
514
515
516static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
517 struct buffer_head *bh,
518 void *data, size_t size)
519{
520 struct ocfs2_disk_dqtrailer *dqt =
521 ocfs2_block_dqtrailer(size, data);
522
523
524
525
526
527
528
529 ocfs2_block_check_compute(data, size, &dqt->dq_check);
530}
531
532
533
534
535
536static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
537 struct buffer_head *bh,
538 void *data, size_t size)
539{
540 struct ocfs2_dir_block_trailer *trailer =
541 ocfs2_dir_trailer_from_size(size, data);
542
543
544
545
546
547
548
549 ocfs2_block_check_compute(data, size, &trailer->db_check);
550}
551
552static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
553 struct buffer_head *bh)
554{
555 mlog(ML_ERROR,
556 "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, "
557 "bh->b_blocknr = %llu\n",
558 (unsigned long)bh,
559 (unsigned long long)bh->b_blocknr);
560
561 ocfs2_error(bh->b_bdev->bd_super,
562 "JBD2 has aborted our journal, ocfs2 cannot continue\n");
563}
564
565static struct ocfs2_triggers di_triggers = {
566 .ot_triggers = {
567 .t_frozen = ocfs2_frozen_trigger,
568 .t_abort = ocfs2_abort_trigger,
569 },
570 .ot_offset = offsetof(struct ocfs2_dinode, i_check),
571};
572
573static struct ocfs2_triggers eb_triggers = {
574 .ot_triggers = {
575 .t_frozen = ocfs2_frozen_trigger,
576 .t_abort = ocfs2_abort_trigger,
577 },
578 .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
579};
580
581static struct ocfs2_triggers rb_triggers = {
582 .ot_triggers = {
583 .t_frozen = ocfs2_frozen_trigger,
584 .t_abort = ocfs2_abort_trigger,
585 },
586 .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
587};
588
589static struct ocfs2_triggers gd_triggers = {
590 .ot_triggers = {
591 .t_frozen = ocfs2_frozen_trigger,
592 .t_abort = ocfs2_abort_trigger,
593 },
594 .ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
595};
596
597static struct ocfs2_triggers db_triggers = {
598 .ot_triggers = {
599 .t_frozen = ocfs2_db_frozen_trigger,
600 .t_abort = ocfs2_abort_trigger,
601 },
602};
603
604static struct ocfs2_triggers xb_triggers = {
605 .ot_triggers = {
606 .t_frozen = ocfs2_frozen_trigger,
607 .t_abort = ocfs2_abort_trigger,
608 },
609 .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
610};
611
612static struct ocfs2_triggers dq_triggers = {
613 .ot_triggers = {
614 .t_frozen = ocfs2_dq_frozen_trigger,
615 .t_abort = ocfs2_abort_trigger,
616 },
617};
618
619static struct ocfs2_triggers dr_triggers = {
620 .ot_triggers = {
621 .t_frozen = ocfs2_frozen_trigger,
622 .t_abort = ocfs2_abort_trigger,
623 },
624 .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
625};
626
627static struct ocfs2_triggers dl_triggers = {
628 .ot_triggers = {
629 .t_frozen = ocfs2_frozen_trigger,
630 .t_abort = ocfs2_abort_trigger,
631 },
632 .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
633};
634
635static int __ocfs2_journal_access(handle_t *handle,
636 struct ocfs2_caching_info *ci,
637 struct buffer_head *bh,
638 struct ocfs2_triggers *triggers,
639 int type)
640{
641 int status;
642 struct ocfs2_super *osb =
643 OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
644
645 BUG_ON(!ci || !ci->ci_ops);
646 BUG_ON(!handle);
647 BUG_ON(!bh);
648
649 trace_ocfs2_journal_access(
650 (unsigned long long)ocfs2_metadata_cache_owner(ci),
651 (unsigned long long)bh->b_blocknr, type, bh->b_size);
652
653
654 if (!buffer_uptodate(bh)) {
655 mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n");
656 mlog(ML_ERROR, "b_blocknr=%llu, b_state=0x%lx\n",
657 (unsigned long long)bh->b_blocknr, bh->b_state);
658
659 lock_buffer(bh);
660
661
662
663
664
665
666
667
668
669
670 if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) {
671 unlock_buffer(bh);
672 return ocfs2_error(osb->sb, "A previous attempt to "
673 "write this buffer head failed\n");
674 }
675 unlock_buffer(bh);
676 }
677
678
679
680
681
682
683
684 ocfs2_set_ci_lock_trans(osb->journal, ci);
685
686 ocfs2_metadata_cache_io_lock(ci);
687 switch (type) {
688 case OCFS2_JOURNAL_ACCESS_CREATE:
689 case OCFS2_JOURNAL_ACCESS_WRITE:
690 status = jbd2_journal_get_write_access(handle, bh);
691 break;
692
693 case OCFS2_JOURNAL_ACCESS_UNDO:
694 status = jbd2_journal_get_undo_access(handle, bh);
695 break;
696
697 default:
698 status = -EINVAL;
699 mlog(ML_ERROR, "Unknown access type!\n");
700 }
701 if (!status && ocfs2_meta_ecc(osb) && triggers)
702 jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
703 ocfs2_metadata_cache_io_unlock(ci);
704
705 if (status < 0)
706 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n",
707 status, type);
708
709 return status;
710}
711
712int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
713 struct buffer_head *bh, int type)
714{
715 return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
716}
717
718int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
719 struct buffer_head *bh, int type)
720{
721 return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
722}
723
724int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
725 struct buffer_head *bh, int type)
726{
727 return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
728 type);
729}
730
731int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
732 struct buffer_head *bh, int type)
733{
734 return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
735}
736
737int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
738 struct buffer_head *bh, int type)
739{
740 return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
741}
742
743int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
744 struct buffer_head *bh, int type)
745{
746 return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
747}
748
749int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
750 struct buffer_head *bh, int type)
751{
752 return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
753}
754
755int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
756 struct buffer_head *bh, int type)
757{
758 return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
759}
760
761int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
762 struct buffer_head *bh, int type)
763{
764 return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
765}
766
767int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
768 struct buffer_head *bh, int type)
769{
770 return __ocfs2_journal_access(handle, ci, bh, NULL, type);
771}
772
773void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
774{
775 int status;
776
777 trace_ocfs2_journal_dirty((unsigned long long)bh->b_blocknr);
778
779 status = jbd2_journal_dirty_metadata(handle, bh);
780 if (status) {
781 mlog_errno(status);
782 if (!is_handle_aborted(handle)) {
783 journal_t *journal = handle->h_transaction->t_journal;
784 struct super_block *sb = bh->b_bdev->bd_super;
785
786 mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. "
787 "Aborting transaction and journal.\n");
788 handle->h_err = status;
789 jbd2_journal_abort_handle(handle);
790 jbd2_journal_abort(journal, status);
791 ocfs2_abort(sb, "Journal already aborted.\n");
792 }
793 }
794}
795
796#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
797
798void ocfs2_set_journal_params(struct ocfs2_super *osb)
799{
800 journal_t *journal = osb->journal->j_journal;
801 unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
802
803 if (osb->osb_commit_interval)
804 commit_interval = osb->osb_commit_interval;
805
806 write_lock(&journal->j_state_lock);
807 journal->j_commit_interval = commit_interval;
808 if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
809 journal->j_flags |= JBD2_BARRIER;
810 else
811 journal->j_flags &= ~JBD2_BARRIER;
812 write_unlock(&journal->j_state_lock);
813}
814
815int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
816{
817 int status = -1;
818 struct inode *inode = NULL;
819 journal_t *j_journal = NULL;
820 struct ocfs2_dinode *di = NULL;
821 struct buffer_head *bh = NULL;
822 struct ocfs2_super *osb;
823 int inode_lock = 0;
824
825 BUG_ON(!journal);
826
827 osb = journal->j_osb;
828
829
830 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
831 osb->slot_num);
832 if (inode == NULL) {
833 status = -EACCES;
834 mlog_errno(status);
835 goto done;
836 }
837 if (is_bad_inode(inode)) {
838 mlog(ML_ERROR, "access error (bad inode)\n");
839 iput(inode);
840 inode = NULL;
841 status = -EACCES;
842 goto done;
843 }
844
845 SET_INODE_JOURNAL(inode);
846 OCFS2_I(inode)->ip_open_count++;
847
848
849
850
851 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
852 if (status < 0) {
853 if (status != -ERESTARTSYS)
854 mlog(ML_ERROR, "Could not get lock on journal!\n");
855 goto done;
856 }
857
858 inode_lock = 1;
859 di = (struct ocfs2_dinode *)bh->b_data;
860
861 if (i_size_read(inode) < OCFS2_MIN_JOURNAL_SIZE) {
862 mlog(ML_ERROR, "Journal file size (%lld) is too small!\n",
863 i_size_read(inode));
864 status = -EINVAL;
865 goto done;
866 }
867
868 trace_ocfs2_journal_init(i_size_read(inode),
869 (unsigned long long)inode->i_blocks,
870 OCFS2_I(inode)->ip_clusters);
871
872
873 j_journal = jbd2_journal_init_inode(inode);
874 if (j_journal == NULL) {
875 mlog(ML_ERROR, "Linux journal layer error\n");
876 status = -EINVAL;
877 goto done;
878 }
879
880 trace_ocfs2_journal_init_maxlen(j_journal->j_total_len);
881
882 *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
883 OCFS2_JOURNAL_DIRTY_FL);
884
885 journal->j_journal = j_journal;
886 journal->j_journal->j_submit_inode_data_buffers =
887 jbd2_journal_submit_inode_data_buffers;
888 journal->j_journal->j_finish_inode_data_buffers =
889 jbd2_journal_finish_inode_data_buffers;
890 journal->j_inode = inode;
891 journal->j_bh = bh;
892
893 ocfs2_set_journal_params(osb);
894
895 journal->j_state = OCFS2_JOURNAL_LOADED;
896
897 status = 0;
898done:
899 if (status < 0) {
900 if (inode_lock)
901 ocfs2_inode_unlock(inode, 1);
902 brelse(bh);
903 if (inode) {
904 OCFS2_I(inode)->ip_open_count--;
905 iput(inode);
906 }
907 }
908
909 return status;
910}
911
912static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
913{
914 le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
915}
916
917static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
918{
919 return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
920}
921
922static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
923 int dirty, int replayed)
924{
925 int status;
926 unsigned int flags;
927 struct ocfs2_journal *journal = osb->journal;
928 struct buffer_head *bh = journal->j_bh;
929 struct ocfs2_dinode *fe;
930
931 fe = (struct ocfs2_dinode *)bh->b_data;
932
933
934
935
936 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
937
938 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
939 if (dirty)
940 flags |= OCFS2_JOURNAL_DIRTY_FL;
941 else
942 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
943 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
944
945 if (replayed)
946 ocfs2_bump_recovery_generation(fe);
947
948 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
949 status = ocfs2_write_block(osb, bh, INODE_CACHE(journal->j_inode));
950 if (status < 0)
951 mlog_errno(status);
952
953 return status;
954}
955
956
957
958
959
960void ocfs2_journal_shutdown(struct ocfs2_super *osb)
961{
962 struct ocfs2_journal *journal = NULL;
963 int status = 0;
964 struct inode *inode = NULL;
965 int num_running_trans = 0;
966
967 BUG_ON(!osb);
968
969 journal = osb->journal;
970 if (!journal)
971 goto done;
972
973 inode = journal->j_inode;
974
975 if (journal->j_state != OCFS2_JOURNAL_LOADED)
976 goto done;
977
978
979 if (!igrab(inode))
980 BUG();
981
982 num_running_trans = atomic_read(&(osb->journal->j_num_trans));
983 trace_ocfs2_journal_shutdown(num_running_trans);
984
985
986
987
988
989 journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN;
990
991
992
993
994 if (osb->commit_task) {
995
996 trace_ocfs2_journal_shutdown_wait(osb->commit_task);
997 kthread_stop(osb->commit_task);
998 osb->commit_task = NULL;
999 }
1000
1001 BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
1002
1003 if (ocfs2_mount_local(osb)) {
1004 jbd2_journal_lock_updates(journal->j_journal);
1005 status = jbd2_journal_flush(journal->j_journal);
1006 jbd2_journal_unlock_updates(journal->j_journal);
1007 if (status < 0)
1008 mlog_errno(status);
1009 }
1010
1011
1012 if (!jbd2_journal_destroy(journal->j_journal) && !status) {
1013
1014
1015
1016
1017 status = ocfs2_journal_toggle_dirty(osb, 0, 0);
1018 if (status < 0)
1019 mlog_errno(status);
1020 }
1021 journal->j_journal = NULL;
1022
1023 OCFS2_I(inode)->ip_open_count--;
1024
1025
1026 ocfs2_inode_unlock(inode, 1);
1027
1028 brelse(journal->j_bh);
1029 journal->j_bh = NULL;
1030
1031 journal->j_state = OCFS2_JOURNAL_FREE;
1032
1033
1034done:
1035 iput(inode);
1036}
1037
1038static void ocfs2_clear_journal_error(struct super_block *sb,
1039 journal_t *journal,
1040 int slot)
1041{
1042 int olderr;
1043
1044 olderr = jbd2_journal_errno(journal);
1045 if (olderr) {
1046 mlog(ML_ERROR, "File system error %d recorded in "
1047 "journal %u.\n", olderr, slot);
1048 mlog(ML_ERROR, "File system on device %s needs checking.\n",
1049 sb->s_id);
1050
1051 jbd2_journal_ack_err(journal);
1052 jbd2_journal_clear_err(journal);
1053 }
1054}
1055
1056int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
1057{
1058 int status = 0;
1059 struct ocfs2_super *osb;
1060
1061 BUG_ON(!journal);
1062
1063 osb = journal->j_osb;
1064
1065 status = jbd2_journal_load(journal->j_journal);
1066 if (status < 0) {
1067 mlog(ML_ERROR, "Failed to load journal!\n");
1068 goto done;
1069 }
1070
1071 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
1072
1073 if (replayed) {
1074 jbd2_journal_lock_updates(journal->j_journal);
1075 status = jbd2_journal_flush(journal->j_journal);
1076 jbd2_journal_unlock_updates(journal->j_journal);
1077 if (status < 0)
1078 mlog_errno(status);
1079 }
1080
1081 status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
1082 if (status < 0) {
1083 mlog_errno(status);
1084 goto done;
1085 }
1086
1087
1088 if (!local) {
1089 osb->commit_task = kthread_run(ocfs2_commit_thread, osb,
1090 "ocfs2cmt-%s", osb->uuid_str);
1091 if (IS_ERR(osb->commit_task)) {
1092 status = PTR_ERR(osb->commit_task);
1093 osb->commit_task = NULL;
1094 mlog(ML_ERROR, "unable to launch ocfs2commit thread, "
1095 "error=%d", status);
1096 goto done;
1097 }
1098 } else
1099 osb->commit_task = NULL;
1100
1101done:
1102 return status;
1103}
1104
1105
1106
1107
1108int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
1109{
1110 int status;
1111
1112 BUG_ON(!journal);
1113
1114 status = jbd2_journal_wipe(journal->j_journal, full);
1115 if (status < 0) {
1116 mlog_errno(status);
1117 goto bail;
1118 }
1119
1120 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
1121 if (status < 0)
1122 mlog_errno(status);
1123
1124bail:
1125 return status;
1126}
1127
1128static int ocfs2_recovery_completed(struct ocfs2_super *osb)
1129{
1130 int empty;
1131 struct ocfs2_recovery_map *rm = osb->recovery_map;
1132
1133 spin_lock(&osb->osb_lock);
1134 empty = (rm->rm_used == 0);
1135 spin_unlock(&osb->osb_lock);
1136
1137 return empty;
1138}
1139
1140void ocfs2_wait_for_recovery(struct ocfs2_super *osb)
1141{
1142 wait_event(osb->recovery_event, ocfs2_recovery_completed(osb));
1143}
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155static int ocfs2_force_read_journal(struct inode *inode)
1156{
1157 int status = 0;
1158 int i;
1159 u64 v_blkno, p_blkno, p_blocks, num_blocks;
1160 struct buffer_head *bh = NULL;
1161 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1162
1163 num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
1164 v_blkno = 0;
1165 while (v_blkno < num_blocks) {
1166 status = ocfs2_extent_map_get_blocks(inode, v_blkno,
1167 &p_blkno, &p_blocks, NULL);
1168 if (status < 0) {
1169 mlog_errno(status);
1170 goto bail;
1171 }
1172
1173 for (i = 0; i < p_blocks; i++, p_blkno++) {
1174 bh = __find_get_block(osb->sb->s_bdev, p_blkno,
1175 osb->sb->s_blocksize);
1176
1177 if (!bh)
1178 continue;
1179
1180 brelse(bh);
1181 bh = NULL;
1182
1183
1184
1185 status = ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
1186 if (status < 0) {
1187 mlog_errno(status);
1188 goto bail;
1189 }
1190
1191 brelse(bh);
1192 bh = NULL;
1193 }
1194
1195 v_blkno += p_blocks;
1196 }
1197
1198bail:
1199 return status;
1200}
1201
1202struct ocfs2_la_recovery_item {
1203 struct list_head lri_list;
1204 int lri_slot;
1205 struct ocfs2_dinode *lri_la_dinode;
1206 struct ocfs2_dinode *lri_tl_dinode;
1207 struct ocfs2_quota_recovery *lri_qrec;
1208 enum ocfs2_orphan_reco_type lri_orphan_reco_type;
1209};
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221void ocfs2_complete_recovery(struct work_struct *work)
1222{
1223 int ret = 0;
1224 struct ocfs2_journal *journal =
1225 container_of(work, struct ocfs2_journal, j_recovery_work);
1226 struct ocfs2_super *osb = journal->j_osb;
1227 struct ocfs2_dinode *la_dinode, *tl_dinode;
1228 struct ocfs2_la_recovery_item *item, *n;
1229 struct ocfs2_quota_recovery *qrec;
1230 enum ocfs2_orphan_reco_type orphan_reco_type;
1231 LIST_HEAD(tmp_la_list);
1232
1233 trace_ocfs2_complete_recovery(
1234 (unsigned long long)OCFS2_I(journal->j_inode)->ip_blkno);
1235
1236 spin_lock(&journal->j_lock);
1237 list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
1238 spin_unlock(&journal->j_lock);
1239
1240 list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
1241 list_del_init(&item->lri_list);
1242
1243 ocfs2_wait_on_quotas(osb);
1244
1245 la_dinode = item->lri_la_dinode;
1246 tl_dinode = item->lri_tl_dinode;
1247 qrec = item->lri_qrec;
1248 orphan_reco_type = item->lri_orphan_reco_type;
1249
1250 trace_ocfs2_complete_recovery_slot(item->lri_slot,
1251 la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0,
1252 tl_dinode ? le64_to_cpu(tl_dinode->i_blkno) : 0,
1253 qrec);
1254
1255 if (la_dinode) {
1256 ret = ocfs2_complete_local_alloc_recovery(osb,
1257 la_dinode);
1258 if (ret < 0)
1259 mlog_errno(ret);
1260
1261 kfree(la_dinode);
1262 }
1263
1264 if (tl_dinode) {
1265 ret = ocfs2_complete_truncate_log_recovery(osb,
1266 tl_dinode);
1267 if (ret < 0)
1268 mlog_errno(ret);
1269
1270 kfree(tl_dinode);
1271 }
1272
1273 ret = ocfs2_recover_orphans(osb, item->lri_slot,
1274 orphan_reco_type);
1275 if (ret < 0)
1276 mlog_errno(ret);
1277
1278 if (qrec) {
1279 ret = ocfs2_finish_quota_recovery(osb, qrec,
1280 item->lri_slot);
1281 if (ret < 0)
1282 mlog_errno(ret);
1283
1284 }
1285
1286 kfree(item);
1287 }
1288
1289 trace_ocfs2_complete_recovery_end(ret);
1290}
1291
1292
1293
1294
1295static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
1296 int slot_num,
1297 struct ocfs2_dinode *la_dinode,
1298 struct ocfs2_dinode *tl_dinode,
1299 struct ocfs2_quota_recovery *qrec,
1300 enum ocfs2_orphan_reco_type orphan_reco_type)
1301{
1302 struct ocfs2_la_recovery_item *item;
1303
1304 item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS);
1305 if (!item) {
1306
1307
1308
1309 kfree(la_dinode);
1310 kfree(tl_dinode);
1311
1312 if (qrec)
1313 ocfs2_free_quota_recovery(qrec);
1314
1315 mlog_errno(-ENOMEM);
1316 return;
1317 }
1318
1319 INIT_LIST_HEAD(&item->lri_list);
1320 item->lri_la_dinode = la_dinode;
1321 item->lri_slot = slot_num;
1322 item->lri_tl_dinode = tl_dinode;
1323 item->lri_qrec = qrec;
1324 item->lri_orphan_reco_type = orphan_reco_type;
1325
1326 spin_lock(&journal->j_lock);
1327 list_add_tail(&item->lri_list, &journal->j_la_cleanups);
1328 queue_work(journal->j_osb->ocfs2_wq, &journal->j_recovery_work);
1329 spin_unlock(&journal->j_lock);
1330}
1331
1332
1333
1334void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
1335{
1336 struct ocfs2_journal *journal = osb->journal;
1337
1338 if (ocfs2_is_hard_readonly(osb))
1339 return;
1340
1341
1342
1343 ocfs2_queue_recovery_completion(journal, osb->slot_num,
1344 osb->local_alloc_copy, NULL, NULL,
1345 ORPHAN_NEED_TRUNCATE);
1346 ocfs2_schedule_truncate_log_flush(osb, 0);
1347
1348 osb->local_alloc_copy = NULL;
1349
1350
1351 ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
1352 ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
1353 ocfs2_free_replay_slots(osb);
1354}
1355
1356void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
1357{
1358 if (osb->quota_rec) {
1359 ocfs2_queue_recovery_completion(osb->journal,
1360 osb->slot_num,
1361 NULL,
1362 NULL,
1363 osb->quota_rec,
1364 ORPHAN_NEED_TRUNCATE);
1365 osb->quota_rec = NULL;
1366 }
1367}
1368
1369static int __ocfs2_recovery_thread(void *arg)
1370{
1371 int status, node_num, slot_num;
1372 struct ocfs2_super *osb = arg;
1373 struct ocfs2_recovery_map *rm = osb->recovery_map;
1374 int *rm_quota = NULL;
1375 int rm_quota_used = 0, i;
1376 struct ocfs2_quota_recovery *qrec;
1377
1378
1379 int quota_enabled = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
1380 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
1381 || OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
1382 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA);
1383
1384 status = ocfs2_wait_on_mount(osb);
1385 if (status < 0) {
1386 goto bail;
1387 }
1388
1389 if (quota_enabled) {
1390 rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS);
1391 if (!rm_quota) {
1392 status = -ENOMEM;
1393 goto bail;
1394 }
1395 }
1396restart:
1397 status = ocfs2_super_lock(osb, 1);
1398 if (status < 0) {
1399 mlog_errno(status);
1400 goto bail;
1401 }
1402
1403 status = ocfs2_compute_replay_slots(osb);
1404 if (status < 0)
1405 mlog_errno(status);
1406
1407
1408 ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
1409 NULL, NULL, ORPHAN_NO_NEED_TRUNCATE);
1410
1411 spin_lock(&osb->osb_lock);
1412 while (rm->rm_used) {
1413
1414
1415 node_num = rm->rm_entries[0];
1416 spin_unlock(&osb->osb_lock);
1417 slot_num = ocfs2_node_num_to_slot(osb, node_num);
1418 trace_ocfs2_recovery_thread_node(node_num, slot_num);
1419 if (slot_num == -ENOENT) {
1420 status = 0;
1421 goto skip_recovery;
1422 }
1423
1424
1425
1426
1427
1428
1429
1430 if (quota_enabled) {
1431 for (i = 0; i < rm_quota_used
1432 && rm_quota[i] != slot_num; i++)
1433 ;
1434
1435 if (i == rm_quota_used)
1436 rm_quota[rm_quota_used++] = slot_num;
1437 }
1438
1439 status = ocfs2_recover_node(osb, node_num, slot_num);
1440skip_recovery:
1441 if (!status) {
1442 ocfs2_recovery_map_clear(osb, node_num);
1443 } else {
1444 mlog(ML_ERROR,
1445 "Error %d recovering node %d on device (%u,%u)!\n",
1446 status, node_num,
1447 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
1448 mlog(ML_ERROR, "Volume requires unmount.\n");
1449 }
1450
1451 spin_lock(&osb->osb_lock);
1452 }
1453 spin_unlock(&osb->osb_lock);
1454 trace_ocfs2_recovery_thread_end(status);
1455
1456
1457 status = ocfs2_check_journals_nolocks(osb);
1458 status = (status == -EROFS) ? 0 : status;
1459 if (status < 0)
1460 mlog_errno(status);
1461
1462
1463
1464
1465 if (quota_enabled) {
1466 for (i = 0; i < rm_quota_used; i++) {
1467 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
1468 if (IS_ERR(qrec)) {
1469 status = PTR_ERR(qrec);
1470 mlog_errno(status);
1471 continue;
1472 }
1473 ocfs2_queue_recovery_completion(osb->journal,
1474 rm_quota[i],
1475 NULL, NULL, qrec,
1476 ORPHAN_NEED_TRUNCATE);
1477 }
1478 }
1479
1480 ocfs2_super_unlock(osb, 1);
1481
1482
1483 ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
1484
1485bail:
1486 mutex_lock(&osb->recovery_lock);
1487 if (!status && !ocfs2_recovery_completed(osb)) {
1488 mutex_unlock(&osb->recovery_lock);
1489 goto restart;
1490 }
1491
1492 ocfs2_free_replay_slots(osb);
1493 osb->recovery_thread_task = NULL;
1494 mb();
1495 wake_up(&osb->recovery_event);
1496
1497 mutex_unlock(&osb->recovery_lock);
1498
1499 if (quota_enabled)
1500 kfree(rm_quota);
1501
1502
1503
1504
1505 complete_and_exit(NULL, status);
1506}
1507
1508void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
1509{
1510 mutex_lock(&osb->recovery_lock);
1511
1512 trace_ocfs2_recovery_thread(node_num, osb->node_num,
1513 osb->disable_recovery, osb->recovery_thread_task,
1514 osb->disable_recovery ?
1515 -1 : ocfs2_recovery_map_set(osb, node_num));
1516
1517 if (osb->disable_recovery)
1518 goto out;
1519
1520 if (osb->recovery_thread_task)
1521 goto out;
1522
1523 osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb,
1524 "ocfs2rec-%s", osb->uuid_str);
1525 if (IS_ERR(osb->recovery_thread_task)) {
1526 mlog_errno((int)PTR_ERR(osb->recovery_thread_task));
1527 osb->recovery_thread_task = NULL;
1528 }
1529
1530out:
1531 mutex_unlock(&osb->recovery_lock);
1532 wake_up(&osb->recovery_event);
1533}
1534
1535static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
1536 int slot_num,
1537 struct buffer_head **bh,
1538 struct inode **ret_inode)
1539{
1540 int status = -EACCES;
1541 struct inode *inode = NULL;
1542
1543 BUG_ON(slot_num >= osb->max_slots);
1544
1545 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1546 slot_num);
1547 if (!inode || is_bad_inode(inode)) {
1548 mlog_errno(status);
1549 goto bail;
1550 }
1551 SET_INODE_JOURNAL(inode);
1552
1553 status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE);
1554 if (status < 0) {
1555 mlog_errno(status);
1556 goto bail;
1557 }
1558
1559 status = 0;
1560
1561bail:
1562 if (inode) {
1563 if (status || !ret_inode)
1564 iput(inode);
1565 else
1566 *ret_inode = inode;
1567 }
1568 return status;
1569}
1570
1571
1572
1573static int ocfs2_replay_journal(struct ocfs2_super *osb,
1574 int node_num,
1575 int slot_num)
1576{
1577 int status;
1578 int got_lock = 0;
1579 unsigned int flags;
1580 struct inode *inode = NULL;
1581 struct ocfs2_dinode *fe;
1582 journal_t *journal = NULL;
1583 struct buffer_head *bh = NULL;
1584 u32 slot_reco_gen;
1585
1586 status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
1587 if (status) {
1588 mlog_errno(status);
1589 goto done;
1590 }
1591
1592 fe = (struct ocfs2_dinode *)bh->b_data;
1593 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1594 brelse(bh);
1595 bh = NULL;
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605 if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
1606 trace_ocfs2_replay_journal_recovered(slot_num,
1607 osb->slot_recovery_generations[slot_num], slot_reco_gen);
1608 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1609 status = -EBUSY;
1610 goto done;
1611 }
1612
1613
1614
1615 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
1616 if (status < 0) {
1617 trace_ocfs2_replay_journal_lock_err(status);
1618 if (status != -ERESTARTSYS)
1619 mlog(ML_ERROR, "Could not lock journal!\n");
1620 goto done;
1621 }
1622 got_lock = 1;
1623
1624 fe = (struct ocfs2_dinode *) bh->b_data;
1625
1626 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1627 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1628
1629 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
1630 trace_ocfs2_replay_journal_skip(node_num);
1631
1632 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1633 goto done;
1634 }
1635
1636
1637 ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
1638
1639 printk(KERN_NOTICE "ocfs2: Begin replay journal (node %d, slot %d) on "\
1640 "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev),
1641 MINOR(osb->sb->s_dev));
1642
1643 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
1644
1645 status = ocfs2_force_read_journal(inode);
1646 if (status < 0) {
1647 mlog_errno(status);
1648 goto done;
1649 }
1650
1651 journal = jbd2_journal_init_inode(inode);
1652 if (journal == NULL) {
1653 mlog(ML_ERROR, "Linux journal layer error\n");
1654 status = -EIO;
1655 goto done;
1656 }
1657
1658 status = jbd2_journal_load(journal);
1659 if (status < 0) {
1660 mlog_errno(status);
1661 if (!igrab(inode))
1662 BUG();
1663 jbd2_journal_destroy(journal);
1664 goto done;
1665 }
1666
1667 ocfs2_clear_journal_error(osb->sb, journal, slot_num);
1668
1669
1670 jbd2_journal_lock_updates(journal);
1671 status = jbd2_journal_flush(journal);
1672 jbd2_journal_unlock_updates(journal);
1673 if (status < 0)
1674 mlog_errno(status);
1675
1676
1677 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1678 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
1679 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
1680
1681
1682 ocfs2_bump_recovery_generation(fe);
1683 osb->slot_recovery_generations[slot_num] =
1684 ocfs2_get_recovery_generation(fe);
1685
1686 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
1687 status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
1688 if (status < 0)
1689 mlog_errno(status);
1690
1691 if (!igrab(inode))
1692 BUG();
1693
1694 jbd2_journal_destroy(journal);
1695
1696 printk(KERN_NOTICE "ocfs2: End replay journal (node %d, slot %d) on "\
1697 "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev),
1698 MINOR(osb->sb->s_dev));
1699done:
1700
1701 if (got_lock)
1702 ocfs2_inode_unlock(inode, 1);
1703
1704 iput(inode);
1705 brelse(bh);
1706
1707 return status;
1708}
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722static int ocfs2_recover_node(struct ocfs2_super *osb,
1723 int node_num, int slot_num)
1724{
1725 int status = 0;
1726 struct ocfs2_dinode *la_copy = NULL;
1727 struct ocfs2_dinode *tl_copy = NULL;
1728
1729 trace_ocfs2_recover_node(node_num, slot_num, osb->node_num);
1730
1731
1732
1733 BUG_ON(osb->node_num == node_num);
1734
1735 status = ocfs2_replay_journal(osb, node_num, slot_num);
1736 if (status < 0) {
1737 if (status == -EBUSY) {
1738 trace_ocfs2_recover_node_skip(slot_num, node_num);
1739 status = 0;
1740 goto done;
1741 }
1742 mlog_errno(status);
1743 goto done;
1744 }
1745
1746
1747 status = ocfs2_begin_local_alloc_recovery(osb, slot_num, &la_copy);
1748 if (status < 0) {
1749 mlog_errno(status);
1750 goto done;
1751 }
1752
1753
1754
1755
1756 status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy);
1757 if (status < 0)
1758 mlog_errno(status);
1759
1760
1761
1762 status = ocfs2_clear_slot(osb, slot_num);
1763 if (status < 0)
1764 mlog_errno(status);
1765
1766
1767 ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
1768 tl_copy, NULL, ORPHAN_NEED_TRUNCATE);
1769
1770 status = 0;
1771done:
1772
1773 return status;
1774}
1775
1776
1777
1778
1779static int ocfs2_trylock_journal(struct ocfs2_super *osb,
1780 int slot_num)
1781{
1782 int status, flags;
1783 struct inode *inode = NULL;
1784
1785 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1786 slot_num);
1787 if (inode == NULL) {
1788 mlog(ML_ERROR, "access error\n");
1789 status = -EACCES;
1790 goto bail;
1791 }
1792 if (is_bad_inode(inode)) {
1793 mlog(ML_ERROR, "access error (bad inode)\n");
1794 iput(inode);
1795 inode = NULL;
1796 status = -EACCES;
1797 goto bail;
1798 }
1799 SET_INODE_JOURNAL(inode);
1800
1801 flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE;
1802 status = ocfs2_inode_lock_full(inode, NULL, 1, flags);
1803 if (status < 0) {
1804 if (status != -EAGAIN)
1805 mlog_errno(status);
1806 goto bail;
1807 }
1808
1809 ocfs2_inode_unlock(inode, 1);
1810bail:
1811 iput(inode);
1812
1813 return status;
1814}
1815
1816
1817
1818int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1819{
1820 unsigned int node_num;
1821 int status, i;
1822 u32 gen;
1823 struct buffer_head *bh = NULL;
1824 struct ocfs2_dinode *di;
1825
1826
1827
1828
1829 for (i = 0; i < osb->max_slots; i++) {
1830
1831 status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
1832 if (status) {
1833 mlog_errno(status);
1834 goto bail;
1835 }
1836 di = (struct ocfs2_dinode *)bh->b_data;
1837 gen = ocfs2_get_recovery_generation(di);
1838 brelse(bh);
1839 bh = NULL;
1840
1841 spin_lock(&osb->osb_lock);
1842 osb->slot_recovery_generations[i] = gen;
1843
1844 trace_ocfs2_mark_dead_nodes(i,
1845 osb->slot_recovery_generations[i]);
1846
1847 if (i == osb->slot_num) {
1848 spin_unlock(&osb->osb_lock);
1849 continue;
1850 }
1851
1852 status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
1853 if (status == -ENOENT) {
1854 spin_unlock(&osb->osb_lock);
1855 continue;
1856 }
1857
1858 if (__ocfs2_recovery_map_test(osb, node_num)) {
1859 spin_unlock(&osb->osb_lock);
1860 continue;
1861 }
1862 spin_unlock(&osb->osb_lock);
1863
1864
1865
1866
1867 status = ocfs2_trylock_journal(osb, i);
1868 if (!status) {
1869
1870
1871
1872 ocfs2_recovery_thread(osb, node_num);
1873 } else if ((status < 0) && (status != -EAGAIN)) {
1874 mlog_errno(status);
1875 goto bail;
1876 }
1877 }
1878
1879 status = 0;
1880bail:
1881 return status;
1882}
1883
1884
1885
1886
1887
1888
1889static inline unsigned long ocfs2_orphan_scan_timeout(void)
1890{
1891 unsigned long time;
1892
1893 get_random_bytes(&time, sizeof(time));
1894 time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000);
1895 return msecs_to_jiffies(time);
1896}
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925static void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1926{
1927 struct ocfs2_orphan_scan *os;
1928 int status, i;
1929 u32 seqno = 0;
1930
1931 os = &osb->osb_orphan_scan;
1932
1933 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1934 goto out;
1935
1936 trace_ocfs2_queue_orphan_scan_begin(os->os_count, os->os_seqno,
1937 atomic_read(&os->os_state));
1938
1939 status = ocfs2_orphan_scan_lock(osb, &seqno);
1940 if (status < 0) {
1941 if (status != -EAGAIN)
1942 mlog_errno(status);
1943 goto out;
1944 }
1945
1946
1947 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1948 goto unlock;
1949
1950 if (os->os_seqno != seqno) {
1951 os->os_seqno = seqno;
1952 goto unlock;
1953 }
1954
1955 for (i = 0; i < osb->max_slots; i++)
1956 ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
1957 NULL, ORPHAN_NO_NEED_TRUNCATE);
1958
1959
1960
1961
1962 seqno++;
1963 os->os_count++;
1964 os->os_scantime = ktime_get_seconds();
1965unlock:
1966 ocfs2_orphan_scan_unlock(osb, seqno);
1967out:
1968 trace_ocfs2_queue_orphan_scan_end(os->os_count, os->os_seqno,
1969 atomic_read(&os->os_state));
1970 return;
1971}
1972
1973
1974static void ocfs2_orphan_scan_work(struct work_struct *work)
1975{
1976 struct ocfs2_orphan_scan *os;
1977 struct ocfs2_super *osb;
1978
1979 os = container_of(work, struct ocfs2_orphan_scan,
1980 os_orphan_scan_work.work);
1981 osb = os->os_osb;
1982
1983 mutex_lock(&os->os_lock);
1984 ocfs2_queue_orphan_scan(osb);
1985 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
1986 queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
1987 ocfs2_orphan_scan_timeout());
1988 mutex_unlock(&os->os_lock);
1989}
1990
1991void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
1992{
1993 struct ocfs2_orphan_scan *os;
1994
1995 os = &osb->osb_orphan_scan;
1996 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) {
1997 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1998 mutex_lock(&os->os_lock);
1999 cancel_delayed_work(&os->os_orphan_scan_work);
2000 mutex_unlock(&os->os_lock);
2001 }
2002}
2003
2004void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
2005{
2006 struct ocfs2_orphan_scan *os;
2007
2008 os = &osb->osb_orphan_scan;
2009 os->os_osb = osb;
2010 os->os_count = 0;
2011 os->os_seqno = 0;
2012 mutex_init(&os->os_lock);
2013 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
2014}
2015
2016void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
2017{
2018 struct ocfs2_orphan_scan *os;
2019
2020 os = &osb->osb_orphan_scan;
2021 os->os_scantime = ktime_get_seconds();
2022 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
2023 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
2024 else {
2025 atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
2026 queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
2027 ocfs2_orphan_scan_timeout());
2028 }
2029}
2030
2031struct ocfs2_orphan_filldir_priv {
2032 struct dir_context ctx;
2033 struct inode *head;
2034 struct ocfs2_super *osb;
2035 enum ocfs2_orphan_reco_type orphan_reco_type;
2036};
2037
2038static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
2039 int name_len, loff_t pos, u64 ino,
2040 unsigned type)
2041{
2042 struct ocfs2_orphan_filldir_priv *p =
2043 container_of(ctx, struct ocfs2_orphan_filldir_priv, ctx);
2044 struct inode *iter;
2045
2046 if (name_len == 1 && !strncmp(".", name, 1))
2047 return 0;
2048 if (name_len == 2 && !strncmp("..", name, 2))
2049 return 0;
2050
2051
2052 if ((p->orphan_reco_type == ORPHAN_NO_NEED_TRUNCATE) &&
2053 (!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX,
2054 OCFS2_DIO_ORPHAN_PREFIX_LEN)))
2055 return 0;
2056
2057
2058 iter = ocfs2_iget(p->osb, ino,
2059 OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0);
2060 if (IS_ERR(iter))
2061 return 0;
2062
2063 if (!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX,
2064 OCFS2_DIO_ORPHAN_PREFIX_LEN))
2065 OCFS2_I(iter)->ip_flags |= OCFS2_INODE_DIO_ORPHAN_ENTRY;
2066
2067
2068
2069 if (OCFS2_I(iter)->ip_next_orphan) {
2070 iput(iter);
2071 return 0;
2072 }
2073
2074 trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno);
2075
2076
2077 OCFS2_I(iter)->ip_next_orphan = p->head;
2078 p->head = iter;
2079
2080 return 0;
2081}
2082
2083static int ocfs2_queue_orphans(struct ocfs2_super *osb,
2084 int slot,
2085 struct inode **head,
2086 enum ocfs2_orphan_reco_type orphan_reco_type)
2087{
2088 int status;
2089 struct inode *orphan_dir_inode = NULL;
2090 struct ocfs2_orphan_filldir_priv priv = {
2091 .ctx.actor = ocfs2_orphan_filldir,
2092 .osb = osb,
2093 .head = *head,
2094 .orphan_reco_type = orphan_reco_type
2095 };
2096
2097 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2098 ORPHAN_DIR_SYSTEM_INODE,
2099 slot);
2100 if (!orphan_dir_inode) {
2101 status = -ENOENT;
2102 mlog_errno(status);
2103 return status;
2104 }
2105
2106 inode_lock(orphan_dir_inode);
2107 status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
2108 if (status < 0) {
2109 mlog_errno(status);
2110 goto out;
2111 }
2112
2113 status = ocfs2_dir_foreach(orphan_dir_inode, &priv.ctx);
2114 if (status) {
2115 mlog_errno(status);
2116 goto out_cluster;
2117 }
2118
2119 *head = priv.head;
2120
2121out_cluster:
2122 ocfs2_inode_unlock(orphan_dir_inode, 0);
2123out:
2124 inode_unlock(orphan_dir_inode);
2125 iput(orphan_dir_inode);
2126 return status;
2127}
2128
2129static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb,
2130 int slot)
2131{
2132 int ret;
2133
2134 spin_lock(&osb->osb_lock);
2135 ret = !osb->osb_orphan_wipes[slot];
2136 spin_unlock(&osb->osb_lock);
2137 return ret;
2138}
2139
2140static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb,
2141 int slot)
2142{
2143 spin_lock(&osb->osb_lock);
2144
2145
2146 ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
2147 while (osb->osb_orphan_wipes[slot]) {
2148
2149
2150
2151 spin_unlock(&osb->osb_lock);
2152 wait_event_interruptible(osb->osb_wipe_event,
2153 ocfs2_orphan_recovery_can_continue(osb, slot));
2154 spin_lock(&osb->osb_lock);
2155 }
2156 spin_unlock(&osb->osb_lock);
2157}
2158
2159static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,
2160 int slot)
2161{
2162 ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
2163}
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183static int ocfs2_recover_orphans(struct ocfs2_super *osb,
2184 int slot,
2185 enum ocfs2_orphan_reco_type orphan_reco_type)
2186{
2187 int ret = 0;
2188 struct inode *inode = NULL;
2189 struct inode *iter;
2190 struct ocfs2_inode_info *oi;
2191 struct buffer_head *di_bh = NULL;
2192 struct ocfs2_dinode *di = NULL;
2193
2194 trace_ocfs2_recover_orphans(slot);
2195
2196 ocfs2_mark_recovering_orphan_dir(osb, slot);
2197 ret = ocfs2_queue_orphans(osb, slot, &inode, orphan_reco_type);
2198 ocfs2_clear_recovering_orphan_dir(osb, slot);
2199
2200
2201
2202 if (ret)
2203 mlog_errno(ret);
2204
2205 while (inode) {
2206 oi = OCFS2_I(inode);
2207 trace_ocfs2_recover_orphans_iput(
2208 (unsigned long long)oi->ip_blkno);
2209
2210 iter = oi->ip_next_orphan;
2211 oi->ip_next_orphan = NULL;
2212
2213 if (oi->ip_flags & OCFS2_INODE_DIO_ORPHAN_ENTRY) {
2214 inode_lock(inode);
2215 ret = ocfs2_rw_lock(inode, 1);
2216 if (ret < 0) {
2217 mlog_errno(ret);
2218 goto unlock_mutex;
2219 }
2220
2221
2222
2223
2224 ret = ocfs2_inode_lock(inode, &di_bh, 1);
2225 if (ret) {
2226 mlog_errno(ret);
2227 goto unlock_rw;
2228 }
2229
2230 di = (struct ocfs2_dinode *)di_bh->b_data;
2231
2232 if (di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL)) {
2233 ret = ocfs2_truncate_file(inode, di_bh,
2234 i_size_read(inode));
2235 if (ret < 0) {
2236 if (ret != -ENOSPC)
2237 mlog_errno(ret);
2238 goto unlock_inode;
2239 }
2240
2241 ret = ocfs2_del_inode_from_orphan(osb, inode,
2242 di_bh, 0, 0);
2243 if (ret)
2244 mlog_errno(ret);
2245 }
2246unlock_inode:
2247 ocfs2_inode_unlock(inode, 1);
2248 brelse(di_bh);
2249 di_bh = NULL;
2250unlock_rw:
2251 ocfs2_rw_unlock(inode, 1);
2252unlock_mutex:
2253 inode_unlock(inode);
2254
2255
2256 oi->ip_flags &= ~OCFS2_INODE_DIO_ORPHAN_ENTRY;
2257 } else {
2258 spin_lock(&oi->ip_lock);
2259
2260
2261 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
2262 spin_unlock(&oi->ip_lock);
2263 }
2264
2265 iput(inode);
2266 inode = iter;
2267 }
2268
2269 return ret;
2270}
2271
2272static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
2273{
2274
2275
2276
2277 wait_event(osb->osb_mount_event,
2278 (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) ||
2279 atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS ||
2280 atomic_read(&osb->vol_state) == VOLUME_DISABLED);
2281
2282
2283
2284
2285 if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) {
2286 trace_ocfs2_wait_on_mount(VOLUME_DISABLED);
2287 mlog(0, "mount error, exiting!\n");
2288 return -EBUSY;
2289 }
2290
2291 return 0;
2292}
2293
2294static int ocfs2_commit_thread(void *arg)
2295{
2296 int status;
2297 struct ocfs2_super *osb = arg;
2298 struct ocfs2_journal *journal = osb->journal;
2299
2300
2301
2302
2303
2304 while (!(kthread_should_stop() &&
2305 atomic_read(&journal->j_num_trans) == 0)) {
2306
2307 wait_event_interruptible(osb->checkpoint_event,
2308 atomic_read(&journal->j_num_trans)
2309 || kthread_should_stop());
2310
2311 status = ocfs2_commit_cache(osb);
2312 if (status < 0) {
2313 static unsigned long abort_warn_time;
2314
2315
2316 if (printk_timed_ratelimit(&abort_warn_time, 60*HZ))
2317 mlog(ML_ERROR, "status = %d, journal is "
2318 "already aborted.\n", status);
2319
2320
2321
2322
2323
2324 msleep_interruptible(1000);
2325 }
2326
2327 if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){
2328 mlog(ML_KTHREAD,
2329 "commit_thread: %u transactions pending on "
2330 "shutdown\n",
2331 atomic_read(&journal->j_num_trans));
2332 }
2333 }
2334
2335 return 0;
2336}
2337
2338
2339
2340
2341
2342
2343int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
2344{
2345 int ret = 0;
2346 unsigned int slot;
2347 struct buffer_head *di_bh = NULL;
2348 struct ocfs2_dinode *di;
2349 int journal_dirty = 0;
2350
2351 for(slot = 0; slot < osb->max_slots; slot++) {
2352 ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
2353 if (ret) {
2354 mlog_errno(ret);
2355 goto out;
2356 }
2357
2358 di = (struct ocfs2_dinode *) di_bh->b_data;
2359
2360 osb->slot_recovery_generations[slot] =
2361 ocfs2_get_recovery_generation(di);
2362
2363 if (le32_to_cpu(di->id1.journal1.ij_flags) &
2364 OCFS2_JOURNAL_DIRTY_FL)
2365 journal_dirty = 1;
2366
2367 brelse(di_bh);
2368 di_bh = NULL;
2369 }
2370
2371out:
2372 if (journal_dirty)
2373 ret = -EROFS;
2374 return ret;
2375}
2376