1
2
3
4
5
6
7
8
9
10#include <linux/fs.h>
11#include <linux/types.h>
12#include <linux/slab.h>
13#include <linux/highmem.h>
14#include <linux/kthread.h>
15#include <linux/time.h>
16#include <linux/random.h>
17#include <linux/delay.h>
18
19#include <cluster/masklog.h>
20
21#include "ocfs2.h"
22
23#include "alloc.h"
24#include "blockcheck.h"
25#include "dir.h"
26#include "dlmglue.h"
27#include "extent_map.h"
28#include "heartbeat.h"
29#include "inode.h"
30#include "journal.h"
31#include "localalloc.h"
32#include "slot_map.h"
33#include "super.h"
34#include "sysfile.h"
35#include "uptodate.h"
36#include "quota.h"
37#include "file.h"
38#include "namei.h"
39
40#include "buffer_head_io.h"
41#include "ocfs2_trace.h"
42
43DEFINE_SPINLOCK(trans_inc_lock);
44
45#define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000
46
47static int ocfs2_force_read_journal(struct inode *inode);
48static int ocfs2_recover_node(struct ocfs2_super *osb,
49 int node_num, int slot_num);
50static int __ocfs2_recovery_thread(void *arg);
51static int ocfs2_commit_cache(struct ocfs2_super *osb);
52static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota);
53static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
54 int dirty, int replayed);
55static int ocfs2_trylock_journal(struct ocfs2_super *osb,
56 int slot_num);
57static int ocfs2_recover_orphans(struct ocfs2_super *osb,
58 int slot,
59 enum ocfs2_orphan_reco_type orphan_reco_type);
60static int ocfs2_commit_thread(void *arg);
61static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
62 int slot_num,
63 struct ocfs2_dinode *la_dinode,
64 struct ocfs2_dinode *tl_dinode,
65 struct ocfs2_quota_recovery *qrec,
66 enum ocfs2_orphan_reco_type orphan_reco_type);
67
68static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
69{
70 return __ocfs2_wait_on_mount(osb, 0);
71}
72
73static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
74{
75 return __ocfs2_wait_on_mount(osb, 1);
76}
77
78
79
80
81
82
83enum ocfs2_replay_state {
84 REPLAY_UNNEEDED = 0,
85 REPLAY_NEEDED,
86 REPLAY_DONE
87};
88
89struct ocfs2_replay_map {
90 unsigned int rm_slots;
91 enum ocfs2_replay_state rm_state;
92 unsigned char rm_replay_slots[];
93};
94
95static void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
96{
97 if (!osb->replay_map)
98 return;
99
100
101 if (osb->replay_map->rm_state == REPLAY_DONE)
102 return;
103
104 osb->replay_map->rm_state = state;
105}
106
107int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
108{
109 struct ocfs2_replay_map *replay_map;
110 int i, node_num;
111
112
113 if (osb->replay_map)
114 return 0;
115
116 replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
117 (osb->max_slots * sizeof(char)), GFP_KERNEL);
118
119 if (!replay_map) {
120 mlog_errno(-ENOMEM);
121 return -ENOMEM;
122 }
123
124 spin_lock(&osb->osb_lock);
125
126 replay_map->rm_slots = osb->max_slots;
127 replay_map->rm_state = REPLAY_UNNEEDED;
128
129
130 for (i = 0; i < replay_map->rm_slots; i++) {
131 if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT)
132 replay_map->rm_replay_slots[i] = 1;
133 }
134
135 osb->replay_map = replay_map;
136 spin_unlock(&osb->osb_lock);
137 return 0;
138}
139
140static void ocfs2_queue_replay_slots(struct ocfs2_super *osb,
141 enum ocfs2_orphan_reco_type orphan_reco_type)
142{
143 struct ocfs2_replay_map *replay_map = osb->replay_map;
144 int i;
145
146 if (!replay_map)
147 return;
148
149 if (replay_map->rm_state != REPLAY_NEEDED)
150 return;
151
152 for (i = 0; i < replay_map->rm_slots; i++)
153 if (replay_map->rm_replay_slots[i])
154 ocfs2_queue_recovery_completion(osb->journal, i, NULL,
155 NULL, NULL,
156 orphan_reco_type);
157 replay_map->rm_state = REPLAY_DONE;
158}
159
160static void ocfs2_free_replay_slots(struct ocfs2_super *osb)
161{
162 struct ocfs2_replay_map *replay_map = osb->replay_map;
163
164 if (!osb->replay_map)
165 return;
166
167 kfree(replay_map);
168 osb->replay_map = NULL;
169}
170
171int ocfs2_recovery_init(struct ocfs2_super *osb)
172{
173 struct ocfs2_recovery_map *rm;
174
175 mutex_init(&osb->recovery_lock);
176 osb->disable_recovery = 0;
177 osb->recovery_thread_task = NULL;
178 init_waitqueue_head(&osb->recovery_event);
179
180 rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
181 osb->max_slots * sizeof(unsigned int),
182 GFP_KERNEL);
183 if (!rm) {
184 mlog_errno(-ENOMEM);
185 return -ENOMEM;
186 }
187
188 rm->rm_entries = (unsigned int *)((char *)rm +
189 sizeof(struct ocfs2_recovery_map));
190 osb->recovery_map = rm;
191
192 return 0;
193}
194
195
196
197
198static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
199{
200 mb();
201 return osb->recovery_thread_task != NULL;
202}
203
204void ocfs2_recovery_exit(struct ocfs2_super *osb)
205{
206 struct ocfs2_recovery_map *rm;
207
208
209
210 mutex_lock(&osb->recovery_lock);
211 osb->disable_recovery = 1;
212 mutex_unlock(&osb->recovery_lock);
213 wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
214
215
216
217
218 if (osb->ocfs2_wq)
219 flush_workqueue(osb->ocfs2_wq);
220
221
222
223
224
225 rm = osb->recovery_map;
226
227
228 kfree(rm);
229}
230
231static int __ocfs2_recovery_map_test(struct ocfs2_super *osb,
232 unsigned int node_num)
233{
234 int i;
235 struct ocfs2_recovery_map *rm = osb->recovery_map;
236
237 assert_spin_locked(&osb->osb_lock);
238
239 for (i = 0; i < rm->rm_used; i++) {
240 if (rm->rm_entries[i] == node_num)
241 return 1;
242 }
243
244 return 0;
245}
246
247
248static int ocfs2_recovery_map_set(struct ocfs2_super *osb,
249 unsigned int node_num)
250{
251 struct ocfs2_recovery_map *rm = osb->recovery_map;
252
253 spin_lock(&osb->osb_lock);
254 if (__ocfs2_recovery_map_test(osb, node_num)) {
255 spin_unlock(&osb->osb_lock);
256 return 1;
257 }
258
259
260 BUG_ON(rm->rm_used >= osb->max_slots);
261
262 rm->rm_entries[rm->rm_used] = node_num;
263 rm->rm_used++;
264 spin_unlock(&osb->osb_lock);
265
266 return 0;
267}
268
269static void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
270 unsigned int node_num)
271{
272 int i;
273 struct ocfs2_recovery_map *rm = osb->recovery_map;
274
275 spin_lock(&osb->osb_lock);
276
277 for (i = 0; i < rm->rm_used; i++) {
278 if (rm->rm_entries[i] == node_num)
279 break;
280 }
281
282 if (i < rm->rm_used) {
283
284 memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]),
285 (rm->rm_used - i - 1) * sizeof(unsigned int));
286 rm->rm_used--;
287 }
288
289 spin_unlock(&osb->osb_lock);
290}
291
292static int ocfs2_commit_cache(struct ocfs2_super *osb)
293{
294 int status = 0;
295 unsigned int flushed;
296 struct ocfs2_journal *journal = NULL;
297
298 journal = osb->journal;
299
300
301 down_write(&journal->j_trans_barrier);
302
303 flushed = atomic_read(&journal->j_num_trans);
304 trace_ocfs2_commit_cache_begin(flushed);
305 if (flushed == 0) {
306 up_write(&journal->j_trans_barrier);
307 goto finally;
308 }
309
310 jbd2_journal_lock_updates(journal->j_journal);
311 status = jbd2_journal_flush(journal->j_journal);
312 jbd2_journal_unlock_updates(journal->j_journal);
313 if (status < 0) {
314 up_write(&journal->j_trans_barrier);
315 mlog_errno(status);
316 goto finally;
317 }
318
319 ocfs2_inc_trans_id(journal);
320
321 flushed = atomic_read(&journal->j_num_trans);
322 atomic_set(&journal->j_num_trans, 0);
323 up_write(&journal->j_trans_barrier);
324
325 trace_ocfs2_commit_cache_end(journal->j_trans_id, flushed);
326
327 ocfs2_wake_downconvert_thread(osb);
328 wake_up(&journal->j_checkpointed);
329finally:
330 return status;
331}
332
333handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
334{
335 journal_t *journal = osb->journal->j_journal;
336 handle_t *handle;
337
338 BUG_ON(!osb || !osb->journal->j_journal);
339
340 if (ocfs2_is_hard_readonly(osb))
341 return ERR_PTR(-EROFS);
342
343 BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
344 BUG_ON(max_buffs <= 0);
345
346
347 if (journal_current_handle())
348 return jbd2_journal_start(journal, max_buffs);
349
350 sb_start_intwrite(osb->sb);
351
352 down_read(&osb->journal->j_trans_barrier);
353
354 handle = jbd2_journal_start(journal, max_buffs);
355 if (IS_ERR(handle)) {
356 up_read(&osb->journal->j_trans_barrier);
357 sb_end_intwrite(osb->sb);
358
359 mlog_errno(PTR_ERR(handle));
360
361 if (is_journal_aborted(journal)) {
362 ocfs2_abort(osb->sb, "Detected aborted journal\n");
363 handle = ERR_PTR(-EROFS);
364 }
365 } else {
366 if (!ocfs2_mount_local(osb))
367 atomic_inc(&(osb->journal->j_num_trans));
368 }
369
370 return handle;
371}
372
373int ocfs2_commit_trans(struct ocfs2_super *osb,
374 handle_t *handle)
375{
376 int ret, nested;
377 struct ocfs2_journal *journal = osb->journal;
378
379 BUG_ON(!handle);
380
381 nested = handle->h_ref > 1;
382 ret = jbd2_journal_stop(handle);
383 if (ret < 0)
384 mlog_errno(ret);
385
386 if (!nested) {
387 up_read(&journal->j_trans_barrier);
388 sb_end_intwrite(osb->sb);
389 }
390
391 return ret;
392}
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411int ocfs2_extend_trans(handle_t *handle, int nblocks)
412{
413 int status, old_nblocks;
414
415 BUG_ON(!handle);
416 BUG_ON(nblocks < 0);
417
418 if (!nblocks)
419 return 0;
420
421 old_nblocks = jbd2_handle_buffer_credits(handle);
422
423 trace_ocfs2_extend_trans(old_nblocks, nblocks);
424
425#ifdef CONFIG_OCFS2_DEBUG_FS
426 status = 1;
427#else
428 status = jbd2_journal_extend(handle, nblocks, 0);
429 if (status < 0) {
430 mlog_errno(status);
431 goto bail;
432 }
433#endif
434
435 if (status > 0) {
436 trace_ocfs2_extend_trans_restart(old_nblocks + nblocks);
437 status = jbd2_journal_restart(handle,
438 old_nblocks + nblocks);
439 if (status < 0) {
440 mlog_errno(status);
441 goto bail;
442 }
443 }
444
445 status = 0;
446bail:
447 return status;
448}
449
450
451
452
453
454
455
456int ocfs2_allocate_extend_trans(handle_t *handle, int thresh)
457{
458 int status, old_nblks;
459
460 BUG_ON(!handle);
461
462 old_nblks = jbd2_handle_buffer_credits(handle);
463 trace_ocfs2_allocate_extend_trans(old_nblks, thresh);
464
465 if (old_nblks < thresh)
466 return 0;
467
468 status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA, 0);
469 if (status < 0) {
470 mlog_errno(status);
471 goto bail;
472 }
473
474 if (status > 0) {
475 status = jbd2_journal_restart(handle, OCFS2_MAX_TRANS_DATA);
476 if (status < 0)
477 mlog_errno(status);
478 }
479
480bail:
481 return status;
482}
483
484
485struct ocfs2_triggers {
486 struct jbd2_buffer_trigger_type ot_triggers;
487 int ot_offset;
488};
489
490static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
491{
492 return container_of(triggers, struct ocfs2_triggers, ot_triggers);
493}
494
495static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
496 struct buffer_head *bh,
497 void *data, size_t size)
498{
499 struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
500
501
502
503
504
505
506
507 ocfs2_block_check_compute(data, size, data + ot->ot_offset);
508}
509
510
511
512
513
514static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
515 struct buffer_head *bh,
516 void *data, size_t size)
517{
518 struct ocfs2_disk_dqtrailer *dqt =
519 ocfs2_block_dqtrailer(size, data);
520
521
522
523
524
525
526
527 ocfs2_block_check_compute(data, size, &dqt->dq_check);
528}
529
530
531
532
533
534static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
535 struct buffer_head *bh,
536 void *data, size_t size)
537{
538 struct ocfs2_dir_block_trailer *trailer =
539 ocfs2_dir_trailer_from_size(size, data);
540
541
542
543
544
545
546
547 ocfs2_block_check_compute(data, size, &trailer->db_check);
548}
549
550static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
551 struct buffer_head *bh)
552{
553 mlog(ML_ERROR,
554 "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, "
555 "bh->b_blocknr = %llu\n",
556 (unsigned long)bh,
557 (unsigned long long)bh->b_blocknr);
558
559 ocfs2_error(bh->b_bdev->bd_super,
560 "JBD2 has aborted our journal, ocfs2 cannot continue\n");
561}
562
563static struct ocfs2_triggers di_triggers = {
564 .ot_triggers = {
565 .t_frozen = ocfs2_frozen_trigger,
566 .t_abort = ocfs2_abort_trigger,
567 },
568 .ot_offset = offsetof(struct ocfs2_dinode, i_check),
569};
570
571static struct ocfs2_triggers eb_triggers = {
572 .ot_triggers = {
573 .t_frozen = ocfs2_frozen_trigger,
574 .t_abort = ocfs2_abort_trigger,
575 },
576 .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
577};
578
579static struct ocfs2_triggers rb_triggers = {
580 .ot_triggers = {
581 .t_frozen = ocfs2_frozen_trigger,
582 .t_abort = ocfs2_abort_trigger,
583 },
584 .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
585};
586
587static struct ocfs2_triggers gd_triggers = {
588 .ot_triggers = {
589 .t_frozen = ocfs2_frozen_trigger,
590 .t_abort = ocfs2_abort_trigger,
591 },
592 .ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
593};
594
595static struct ocfs2_triggers db_triggers = {
596 .ot_triggers = {
597 .t_frozen = ocfs2_db_frozen_trigger,
598 .t_abort = ocfs2_abort_trigger,
599 },
600};
601
602static struct ocfs2_triggers xb_triggers = {
603 .ot_triggers = {
604 .t_frozen = ocfs2_frozen_trigger,
605 .t_abort = ocfs2_abort_trigger,
606 },
607 .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
608};
609
610static struct ocfs2_triggers dq_triggers = {
611 .ot_triggers = {
612 .t_frozen = ocfs2_dq_frozen_trigger,
613 .t_abort = ocfs2_abort_trigger,
614 },
615};
616
617static struct ocfs2_triggers dr_triggers = {
618 .ot_triggers = {
619 .t_frozen = ocfs2_frozen_trigger,
620 .t_abort = ocfs2_abort_trigger,
621 },
622 .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
623};
624
625static struct ocfs2_triggers dl_triggers = {
626 .ot_triggers = {
627 .t_frozen = ocfs2_frozen_trigger,
628 .t_abort = ocfs2_abort_trigger,
629 },
630 .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
631};
632
633static int __ocfs2_journal_access(handle_t *handle,
634 struct ocfs2_caching_info *ci,
635 struct buffer_head *bh,
636 struct ocfs2_triggers *triggers,
637 int type)
638{
639 int status;
640 struct ocfs2_super *osb =
641 OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
642
643 BUG_ON(!ci || !ci->ci_ops);
644 BUG_ON(!handle);
645 BUG_ON(!bh);
646
647 trace_ocfs2_journal_access(
648 (unsigned long long)ocfs2_metadata_cache_owner(ci),
649 (unsigned long long)bh->b_blocknr, type, bh->b_size);
650
651
652 if (!buffer_uptodate(bh)) {
653 mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n");
654 mlog(ML_ERROR, "b_blocknr=%llu, b_state=0x%lx\n",
655 (unsigned long long)bh->b_blocknr, bh->b_state);
656
657 lock_buffer(bh);
658
659
660
661
662
663
664
665
666
667
668 if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) {
669 unlock_buffer(bh);
670 return ocfs2_error(osb->sb, "A previous attempt to "
671 "write this buffer head failed\n");
672 }
673 unlock_buffer(bh);
674 }
675
676
677
678
679
680
681
682 ocfs2_set_ci_lock_trans(osb->journal, ci);
683
684 ocfs2_metadata_cache_io_lock(ci);
685 switch (type) {
686 case OCFS2_JOURNAL_ACCESS_CREATE:
687 case OCFS2_JOURNAL_ACCESS_WRITE:
688 status = jbd2_journal_get_write_access(handle, bh);
689 break;
690
691 case OCFS2_JOURNAL_ACCESS_UNDO:
692 status = jbd2_journal_get_undo_access(handle, bh);
693 break;
694
695 default:
696 status = -EINVAL;
697 mlog(ML_ERROR, "Unknown access type!\n");
698 }
699 if (!status && ocfs2_meta_ecc(osb) && triggers)
700 jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
701 ocfs2_metadata_cache_io_unlock(ci);
702
703 if (status < 0)
704 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n",
705 status, type);
706
707 return status;
708}
709
710int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
711 struct buffer_head *bh, int type)
712{
713 return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
714}
715
716int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
717 struct buffer_head *bh, int type)
718{
719 return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
720}
721
722int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
723 struct buffer_head *bh, int type)
724{
725 return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
726 type);
727}
728
729int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
730 struct buffer_head *bh, int type)
731{
732 return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
733}
734
735int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
736 struct buffer_head *bh, int type)
737{
738 return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
739}
740
741int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
742 struct buffer_head *bh, int type)
743{
744 return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
745}
746
747int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
748 struct buffer_head *bh, int type)
749{
750 return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
751}
752
753int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
754 struct buffer_head *bh, int type)
755{
756 return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
757}
758
759int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
760 struct buffer_head *bh, int type)
761{
762 return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
763}
764
765int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
766 struct buffer_head *bh, int type)
767{
768 return __ocfs2_journal_access(handle, ci, bh, NULL, type);
769}
770
771void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
772{
773 int status;
774
775 trace_ocfs2_journal_dirty((unsigned long long)bh->b_blocknr);
776
777 status = jbd2_journal_dirty_metadata(handle, bh);
778 if (status) {
779 mlog_errno(status);
780 if (!is_handle_aborted(handle)) {
781 journal_t *journal = handle->h_transaction->t_journal;
782 struct super_block *sb = bh->b_bdev->bd_super;
783
784 mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. "
785 "Aborting transaction and journal.\n");
786 handle->h_err = status;
787 jbd2_journal_abort_handle(handle);
788 jbd2_journal_abort(journal, status);
789 ocfs2_abort(sb, "Journal already aborted.\n");
790 }
791 }
792}
793
794#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
795
796void ocfs2_set_journal_params(struct ocfs2_super *osb)
797{
798 journal_t *journal = osb->journal->j_journal;
799 unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
800
801 if (osb->osb_commit_interval)
802 commit_interval = osb->osb_commit_interval;
803
804 write_lock(&journal->j_state_lock);
805 journal->j_commit_interval = commit_interval;
806 if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
807 journal->j_flags |= JBD2_BARRIER;
808 else
809 journal->j_flags &= ~JBD2_BARRIER;
810 write_unlock(&journal->j_state_lock);
811}
812
813int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
814{
815 int status = -1;
816 struct inode *inode = NULL;
817 journal_t *j_journal = NULL;
818 struct ocfs2_dinode *di = NULL;
819 struct buffer_head *bh = NULL;
820 struct ocfs2_super *osb;
821 int inode_lock = 0;
822
823 BUG_ON(!journal);
824
825 osb = journal->j_osb;
826
827
828 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
829 osb->slot_num);
830 if (inode == NULL) {
831 status = -EACCES;
832 mlog_errno(status);
833 goto done;
834 }
835 if (is_bad_inode(inode)) {
836 mlog(ML_ERROR, "access error (bad inode)\n");
837 iput(inode);
838 inode = NULL;
839 status = -EACCES;
840 goto done;
841 }
842
843 SET_INODE_JOURNAL(inode);
844 OCFS2_I(inode)->ip_open_count++;
845
846
847
848
849 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
850 if (status < 0) {
851 if (status != -ERESTARTSYS)
852 mlog(ML_ERROR, "Could not get lock on journal!\n");
853 goto done;
854 }
855
856 inode_lock = 1;
857 di = (struct ocfs2_dinode *)bh->b_data;
858
859 if (i_size_read(inode) < OCFS2_MIN_JOURNAL_SIZE) {
860 mlog(ML_ERROR, "Journal file size (%lld) is too small!\n",
861 i_size_read(inode));
862 status = -EINVAL;
863 goto done;
864 }
865
866 trace_ocfs2_journal_init(i_size_read(inode),
867 (unsigned long long)inode->i_blocks,
868 OCFS2_I(inode)->ip_clusters);
869
870
871 j_journal = jbd2_journal_init_inode(inode);
872 if (j_journal == NULL) {
873 mlog(ML_ERROR, "Linux journal layer error\n");
874 status = -EINVAL;
875 goto done;
876 }
877
878 trace_ocfs2_journal_init_maxlen(j_journal->j_total_len);
879
880 *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
881 OCFS2_JOURNAL_DIRTY_FL);
882
883 journal->j_journal = j_journal;
884 journal->j_journal->j_submit_inode_data_buffers =
885 jbd2_journal_submit_inode_data_buffers;
886 journal->j_journal->j_finish_inode_data_buffers =
887 jbd2_journal_finish_inode_data_buffers;
888 journal->j_inode = inode;
889 journal->j_bh = bh;
890
891 ocfs2_set_journal_params(osb);
892
893 journal->j_state = OCFS2_JOURNAL_LOADED;
894
895 status = 0;
896done:
897 if (status < 0) {
898 if (inode_lock)
899 ocfs2_inode_unlock(inode, 1);
900 brelse(bh);
901 if (inode) {
902 OCFS2_I(inode)->ip_open_count--;
903 iput(inode);
904 }
905 }
906
907 return status;
908}
909
910static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
911{
912 le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
913}
914
915static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
916{
917 return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
918}
919
920static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
921 int dirty, int replayed)
922{
923 int status;
924 unsigned int flags;
925 struct ocfs2_journal *journal = osb->journal;
926 struct buffer_head *bh = journal->j_bh;
927 struct ocfs2_dinode *fe;
928
929 fe = (struct ocfs2_dinode *)bh->b_data;
930
931
932
933
934 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
935
936 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
937 if (dirty)
938 flags |= OCFS2_JOURNAL_DIRTY_FL;
939 else
940 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
941 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
942
943 if (replayed)
944 ocfs2_bump_recovery_generation(fe);
945
946 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
947 status = ocfs2_write_block(osb, bh, INODE_CACHE(journal->j_inode));
948 if (status < 0)
949 mlog_errno(status);
950
951 return status;
952}
953
954
955
956
957
958void ocfs2_journal_shutdown(struct ocfs2_super *osb)
959{
960 struct ocfs2_journal *journal = NULL;
961 int status = 0;
962 struct inode *inode = NULL;
963 int num_running_trans = 0;
964
965 BUG_ON(!osb);
966
967 journal = osb->journal;
968 if (!journal)
969 goto done;
970
971 inode = journal->j_inode;
972
973 if (journal->j_state != OCFS2_JOURNAL_LOADED)
974 goto done;
975
976
977 if (!igrab(inode))
978 BUG();
979
980 num_running_trans = atomic_read(&(osb->journal->j_num_trans));
981 trace_ocfs2_journal_shutdown(num_running_trans);
982
983
984
985
986
987 journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN;
988
989
990
991
992 if (osb->commit_task) {
993
994 trace_ocfs2_journal_shutdown_wait(osb->commit_task);
995 kthread_stop(osb->commit_task);
996 osb->commit_task = NULL;
997 }
998
999 BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
1000
1001 if (ocfs2_mount_local(osb)) {
1002 jbd2_journal_lock_updates(journal->j_journal);
1003 status = jbd2_journal_flush(journal->j_journal);
1004 jbd2_journal_unlock_updates(journal->j_journal);
1005 if (status < 0)
1006 mlog_errno(status);
1007 }
1008
1009
1010 if (!jbd2_journal_destroy(journal->j_journal) && !status) {
1011
1012
1013
1014
1015 status = ocfs2_journal_toggle_dirty(osb, 0, 0);
1016 if (status < 0)
1017 mlog_errno(status);
1018 }
1019 journal->j_journal = NULL;
1020
1021 OCFS2_I(inode)->ip_open_count--;
1022
1023
1024 ocfs2_inode_unlock(inode, 1);
1025
1026 brelse(journal->j_bh);
1027 journal->j_bh = NULL;
1028
1029 journal->j_state = OCFS2_JOURNAL_FREE;
1030
1031
1032done:
1033 iput(inode);
1034}
1035
1036static void ocfs2_clear_journal_error(struct super_block *sb,
1037 journal_t *journal,
1038 int slot)
1039{
1040 int olderr;
1041
1042 olderr = jbd2_journal_errno(journal);
1043 if (olderr) {
1044 mlog(ML_ERROR, "File system error %d recorded in "
1045 "journal %u.\n", olderr, slot);
1046 mlog(ML_ERROR, "File system on device %s needs checking.\n",
1047 sb->s_id);
1048
1049 jbd2_journal_ack_err(journal);
1050 jbd2_journal_clear_err(journal);
1051 }
1052}
1053
1054int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
1055{
1056 int status = 0;
1057 struct ocfs2_super *osb;
1058
1059 BUG_ON(!journal);
1060
1061 osb = journal->j_osb;
1062
1063 status = jbd2_journal_load(journal->j_journal);
1064 if (status < 0) {
1065 mlog(ML_ERROR, "Failed to load journal!\n");
1066 goto done;
1067 }
1068
1069 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
1070
1071 if (replayed) {
1072 jbd2_journal_lock_updates(journal->j_journal);
1073 status = jbd2_journal_flush(journal->j_journal);
1074 jbd2_journal_unlock_updates(journal->j_journal);
1075 if (status < 0)
1076 mlog_errno(status);
1077 }
1078
1079 status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
1080 if (status < 0) {
1081 mlog_errno(status);
1082 goto done;
1083 }
1084
1085
1086 if (!local) {
1087 osb->commit_task = kthread_run(ocfs2_commit_thread, osb,
1088 "ocfs2cmt-%s", osb->uuid_str);
1089 if (IS_ERR(osb->commit_task)) {
1090 status = PTR_ERR(osb->commit_task);
1091 osb->commit_task = NULL;
1092 mlog(ML_ERROR, "unable to launch ocfs2commit thread, "
1093 "error=%d", status);
1094 goto done;
1095 }
1096 } else
1097 osb->commit_task = NULL;
1098
1099done:
1100 return status;
1101}
1102
1103
1104
1105
1106int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
1107{
1108 int status;
1109
1110 BUG_ON(!journal);
1111
1112 status = jbd2_journal_wipe(journal->j_journal, full);
1113 if (status < 0) {
1114 mlog_errno(status);
1115 goto bail;
1116 }
1117
1118 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
1119 if (status < 0)
1120 mlog_errno(status);
1121
1122bail:
1123 return status;
1124}
1125
1126static int ocfs2_recovery_completed(struct ocfs2_super *osb)
1127{
1128 int empty;
1129 struct ocfs2_recovery_map *rm = osb->recovery_map;
1130
1131 spin_lock(&osb->osb_lock);
1132 empty = (rm->rm_used == 0);
1133 spin_unlock(&osb->osb_lock);
1134
1135 return empty;
1136}
1137
1138void ocfs2_wait_for_recovery(struct ocfs2_super *osb)
1139{
1140 wait_event(osb->recovery_event, ocfs2_recovery_completed(osb));
1141}
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153static int ocfs2_force_read_journal(struct inode *inode)
1154{
1155 int status = 0;
1156 int i;
1157 u64 v_blkno, p_blkno, p_blocks, num_blocks;
1158 struct buffer_head *bh = NULL;
1159 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1160
1161 num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
1162 v_blkno = 0;
1163 while (v_blkno < num_blocks) {
1164 status = ocfs2_extent_map_get_blocks(inode, v_blkno,
1165 &p_blkno, &p_blocks, NULL);
1166 if (status < 0) {
1167 mlog_errno(status);
1168 goto bail;
1169 }
1170
1171 for (i = 0; i < p_blocks; i++, p_blkno++) {
1172 bh = __find_get_block(osb->sb->s_bdev, p_blkno,
1173 osb->sb->s_blocksize);
1174
1175 if (!bh)
1176 continue;
1177
1178 brelse(bh);
1179 bh = NULL;
1180
1181
1182
1183 status = ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
1184 if (status < 0) {
1185 mlog_errno(status);
1186 goto bail;
1187 }
1188
1189 brelse(bh);
1190 bh = NULL;
1191 }
1192
1193 v_blkno += p_blocks;
1194 }
1195
1196bail:
1197 return status;
1198}
1199
1200struct ocfs2_la_recovery_item {
1201 struct list_head lri_list;
1202 int lri_slot;
1203 struct ocfs2_dinode *lri_la_dinode;
1204 struct ocfs2_dinode *lri_tl_dinode;
1205 struct ocfs2_quota_recovery *lri_qrec;
1206 enum ocfs2_orphan_reco_type lri_orphan_reco_type;
1207};
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219void ocfs2_complete_recovery(struct work_struct *work)
1220{
1221 int ret = 0;
1222 struct ocfs2_journal *journal =
1223 container_of(work, struct ocfs2_journal, j_recovery_work);
1224 struct ocfs2_super *osb = journal->j_osb;
1225 struct ocfs2_dinode *la_dinode, *tl_dinode;
1226 struct ocfs2_la_recovery_item *item, *n;
1227 struct ocfs2_quota_recovery *qrec;
1228 enum ocfs2_orphan_reco_type orphan_reco_type;
1229 LIST_HEAD(tmp_la_list);
1230
1231 trace_ocfs2_complete_recovery(
1232 (unsigned long long)OCFS2_I(journal->j_inode)->ip_blkno);
1233
1234 spin_lock(&journal->j_lock);
1235 list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
1236 spin_unlock(&journal->j_lock);
1237
1238 list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
1239 list_del_init(&item->lri_list);
1240
1241 ocfs2_wait_on_quotas(osb);
1242
1243 la_dinode = item->lri_la_dinode;
1244 tl_dinode = item->lri_tl_dinode;
1245 qrec = item->lri_qrec;
1246 orphan_reco_type = item->lri_orphan_reco_type;
1247
1248 trace_ocfs2_complete_recovery_slot(item->lri_slot,
1249 la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0,
1250 tl_dinode ? le64_to_cpu(tl_dinode->i_blkno) : 0,
1251 qrec);
1252
1253 if (la_dinode) {
1254 ret = ocfs2_complete_local_alloc_recovery(osb,
1255 la_dinode);
1256 if (ret < 0)
1257 mlog_errno(ret);
1258
1259 kfree(la_dinode);
1260 }
1261
1262 if (tl_dinode) {
1263 ret = ocfs2_complete_truncate_log_recovery(osb,
1264 tl_dinode);
1265 if (ret < 0)
1266 mlog_errno(ret);
1267
1268 kfree(tl_dinode);
1269 }
1270
1271 ret = ocfs2_recover_orphans(osb, item->lri_slot,
1272 orphan_reco_type);
1273 if (ret < 0)
1274 mlog_errno(ret);
1275
1276 if (qrec) {
1277 ret = ocfs2_finish_quota_recovery(osb, qrec,
1278 item->lri_slot);
1279 if (ret < 0)
1280 mlog_errno(ret);
1281
1282 }
1283
1284 kfree(item);
1285 }
1286
1287 trace_ocfs2_complete_recovery_end(ret);
1288}
1289
1290
1291
1292
1293static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
1294 int slot_num,
1295 struct ocfs2_dinode *la_dinode,
1296 struct ocfs2_dinode *tl_dinode,
1297 struct ocfs2_quota_recovery *qrec,
1298 enum ocfs2_orphan_reco_type orphan_reco_type)
1299{
1300 struct ocfs2_la_recovery_item *item;
1301
1302 item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS);
1303 if (!item) {
1304
1305
1306
1307 kfree(la_dinode);
1308 kfree(tl_dinode);
1309
1310 if (qrec)
1311 ocfs2_free_quota_recovery(qrec);
1312
1313 mlog_errno(-ENOMEM);
1314 return;
1315 }
1316
1317 INIT_LIST_HEAD(&item->lri_list);
1318 item->lri_la_dinode = la_dinode;
1319 item->lri_slot = slot_num;
1320 item->lri_tl_dinode = tl_dinode;
1321 item->lri_qrec = qrec;
1322 item->lri_orphan_reco_type = orphan_reco_type;
1323
1324 spin_lock(&journal->j_lock);
1325 list_add_tail(&item->lri_list, &journal->j_la_cleanups);
1326 queue_work(journal->j_osb->ocfs2_wq, &journal->j_recovery_work);
1327 spin_unlock(&journal->j_lock);
1328}
1329
1330
1331
1332void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
1333{
1334 struct ocfs2_journal *journal = osb->journal;
1335
1336 if (ocfs2_is_hard_readonly(osb))
1337 return;
1338
1339
1340
1341 ocfs2_queue_recovery_completion(journal, osb->slot_num,
1342 osb->local_alloc_copy, NULL, NULL,
1343 ORPHAN_NEED_TRUNCATE);
1344 ocfs2_schedule_truncate_log_flush(osb, 0);
1345
1346 osb->local_alloc_copy = NULL;
1347
1348
1349 ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
1350 ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
1351 ocfs2_free_replay_slots(osb);
1352}
1353
1354void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
1355{
1356 if (osb->quota_rec) {
1357 ocfs2_queue_recovery_completion(osb->journal,
1358 osb->slot_num,
1359 NULL,
1360 NULL,
1361 osb->quota_rec,
1362 ORPHAN_NEED_TRUNCATE);
1363 osb->quota_rec = NULL;
1364 }
1365}
1366
1367static int __ocfs2_recovery_thread(void *arg)
1368{
1369 int status, node_num, slot_num;
1370 struct ocfs2_super *osb = arg;
1371 struct ocfs2_recovery_map *rm = osb->recovery_map;
1372 int *rm_quota = NULL;
1373 int rm_quota_used = 0, i;
1374 struct ocfs2_quota_recovery *qrec;
1375
1376
1377 int quota_enabled = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
1378 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
1379 || OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
1380 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA);
1381
1382 status = ocfs2_wait_on_mount(osb);
1383 if (status < 0) {
1384 goto bail;
1385 }
1386
1387 if (quota_enabled) {
1388 rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS);
1389 if (!rm_quota) {
1390 status = -ENOMEM;
1391 goto bail;
1392 }
1393 }
1394restart:
1395 status = ocfs2_super_lock(osb, 1);
1396 if (status < 0) {
1397 mlog_errno(status);
1398 goto bail;
1399 }
1400
1401 status = ocfs2_compute_replay_slots(osb);
1402 if (status < 0)
1403 mlog_errno(status);
1404
1405
1406 ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
1407 NULL, NULL, ORPHAN_NO_NEED_TRUNCATE);
1408
1409 spin_lock(&osb->osb_lock);
1410 while (rm->rm_used) {
1411
1412
1413 node_num = rm->rm_entries[0];
1414 spin_unlock(&osb->osb_lock);
1415 slot_num = ocfs2_node_num_to_slot(osb, node_num);
1416 trace_ocfs2_recovery_thread_node(node_num, slot_num);
1417 if (slot_num == -ENOENT) {
1418 status = 0;
1419 goto skip_recovery;
1420 }
1421
1422
1423
1424
1425
1426
1427
1428 if (quota_enabled) {
1429 for (i = 0; i < rm_quota_used
1430 && rm_quota[i] != slot_num; i++)
1431 ;
1432
1433 if (i == rm_quota_used)
1434 rm_quota[rm_quota_used++] = slot_num;
1435 }
1436
1437 status = ocfs2_recover_node(osb, node_num, slot_num);
1438skip_recovery:
1439 if (!status) {
1440 ocfs2_recovery_map_clear(osb, node_num);
1441 } else {
1442 mlog(ML_ERROR,
1443 "Error %d recovering node %d on device (%u,%u)!\n",
1444 status, node_num,
1445 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
1446 mlog(ML_ERROR, "Volume requires unmount.\n");
1447 }
1448
1449 spin_lock(&osb->osb_lock);
1450 }
1451 spin_unlock(&osb->osb_lock);
1452 trace_ocfs2_recovery_thread_end(status);
1453
1454
1455 status = ocfs2_check_journals_nolocks(osb);
1456 status = (status == -EROFS) ? 0 : status;
1457 if (status < 0)
1458 mlog_errno(status);
1459
1460
1461
1462
1463 if (quota_enabled) {
1464 for (i = 0; i < rm_quota_used; i++) {
1465 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
1466 if (IS_ERR(qrec)) {
1467 status = PTR_ERR(qrec);
1468 mlog_errno(status);
1469 continue;
1470 }
1471 ocfs2_queue_recovery_completion(osb->journal,
1472 rm_quota[i],
1473 NULL, NULL, qrec,
1474 ORPHAN_NEED_TRUNCATE);
1475 }
1476 }
1477
1478 ocfs2_super_unlock(osb, 1);
1479
1480
1481 ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
1482
1483bail:
1484 mutex_lock(&osb->recovery_lock);
1485 if (!status && !ocfs2_recovery_completed(osb)) {
1486 mutex_unlock(&osb->recovery_lock);
1487 goto restart;
1488 }
1489
1490 ocfs2_free_replay_slots(osb);
1491 osb->recovery_thread_task = NULL;
1492 mb();
1493 wake_up(&osb->recovery_event);
1494
1495 mutex_unlock(&osb->recovery_lock);
1496
1497 if (quota_enabled)
1498 kfree(rm_quota);
1499
1500
1501
1502
1503 complete_and_exit(NULL, status);
1504}
1505
1506void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
1507{
1508 mutex_lock(&osb->recovery_lock);
1509
1510 trace_ocfs2_recovery_thread(node_num, osb->node_num,
1511 osb->disable_recovery, osb->recovery_thread_task,
1512 osb->disable_recovery ?
1513 -1 : ocfs2_recovery_map_set(osb, node_num));
1514
1515 if (osb->disable_recovery)
1516 goto out;
1517
1518 if (osb->recovery_thread_task)
1519 goto out;
1520
1521 osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb,
1522 "ocfs2rec-%s", osb->uuid_str);
1523 if (IS_ERR(osb->recovery_thread_task)) {
1524 mlog_errno((int)PTR_ERR(osb->recovery_thread_task));
1525 osb->recovery_thread_task = NULL;
1526 }
1527
1528out:
1529 mutex_unlock(&osb->recovery_lock);
1530 wake_up(&osb->recovery_event);
1531}
1532
1533static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
1534 int slot_num,
1535 struct buffer_head **bh,
1536 struct inode **ret_inode)
1537{
1538 int status = -EACCES;
1539 struct inode *inode = NULL;
1540
1541 BUG_ON(slot_num >= osb->max_slots);
1542
1543 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1544 slot_num);
1545 if (!inode || is_bad_inode(inode)) {
1546 mlog_errno(status);
1547 goto bail;
1548 }
1549 SET_INODE_JOURNAL(inode);
1550
1551 status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE);
1552 if (status < 0) {
1553 mlog_errno(status);
1554 goto bail;
1555 }
1556
1557 status = 0;
1558
1559bail:
1560 if (inode) {
1561 if (status || !ret_inode)
1562 iput(inode);
1563 else
1564 *ret_inode = inode;
1565 }
1566 return status;
1567}
1568
1569
1570
1571static int ocfs2_replay_journal(struct ocfs2_super *osb,
1572 int node_num,
1573 int slot_num)
1574{
1575 int status;
1576 int got_lock = 0;
1577 unsigned int flags;
1578 struct inode *inode = NULL;
1579 struct ocfs2_dinode *fe;
1580 journal_t *journal = NULL;
1581 struct buffer_head *bh = NULL;
1582 u32 slot_reco_gen;
1583
1584 status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
1585 if (status) {
1586 mlog_errno(status);
1587 goto done;
1588 }
1589
1590 fe = (struct ocfs2_dinode *)bh->b_data;
1591 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1592 brelse(bh);
1593 bh = NULL;
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603 if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
1604 trace_ocfs2_replay_journal_recovered(slot_num,
1605 osb->slot_recovery_generations[slot_num], slot_reco_gen);
1606 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1607 status = -EBUSY;
1608 goto done;
1609 }
1610
1611
1612
1613 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
1614 if (status < 0) {
1615 trace_ocfs2_replay_journal_lock_err(status);
1616 if (status != -ERESTARTSYS)
1617 mlog(ML_ERROR, "Could not lock journal!\n");
1618 goto done;
1619 }
1620 got_lock = 1;
1621
1622 fe = (struct ocfs2_dinode *) bh->b_data;
1623
1624 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1625 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1626
1627 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
1628 trace_ocfs2_replay_journal_skip(node_num);
1629
1630 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1631 goto done;
1632 }
1633
1634
1635 ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
1636
1637 printk(KERN_NOTICE "ocfs2: Begin replay journal (node %d, slot %d) on "\
1638 "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev),
1639 MINOR(osb->sb->s_dev));
1640
1641 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
1642
1643 status = ocfs2_force_read_journal(inode);
1644 if (status < 0) {
1645 mlog_errno(status);
1646 goto done;
1647 }
1648
1649 journal = jbd2_journal_init_inode(inode);
1650 if (journal == NULL) {
1651 mlog(ML_ERROR, "Linux journal layer error\n");
1652 status = -EIO;
1653 goto done;
1654 }
1655
1656 status = jbd2_journal_load(journal);
1657 if (status < 0) {
1658 mlog_errno(status);
1659 if (!igrab(inode))
1660 BUG();
1661 jbd2_journal_destroy(journal);
1662 goto done;
1663 }
1664
1665 ocfs2_clear_journal_error(osb->sb, journal, slot_num);
1666
1667
1668 jbd2_journal_lock_updates(journal);
1669 status = jbd2_journal_flush(journal);
1670 jbd2_journal_unlock_updates(journal);
1671 if (status < 0)
1672 mlog_errno(status);
1673
1674
1675 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1676 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
1677 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
1678
1679
1680 ocfs2_bump_recovery_generation(fe);
1681 osb->slot_recovery_generations[slot_num] =
1682 ocfs2_get_recovery_generation(fe);
1683
1684 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
1685 status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
1686 if (status < 0)
1687 mlog_errno(status);
1688
1689 if (!igrab(inode))
1690 BUG();
1691
1692 jbd2_journal_destroy(journal);
1693
1694 printk(KERN_NOTICE "ocfs2: End replay journal (node %d, slot %d) on "\
1695 "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev),
1696 MINOR(osb->sb->s_dev));
1697done:
1698
1699 if (got_lock)
1700 ocfs2_inode_unlock(inode, 1);
1701
1702 iput(inode);
1703 brelse(bh);
1704
1705 return status;
1706}
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720static int ocfs2_recover_node(struct ocfs2_super *osb,
1721 int node_num, int slot_num)
1722{
1723 int status = 0;
1724 struct ocfs2_dinode *la_copy = NULL;
1725 struct ocfs2_dinode *tl_copy = NULL;
1726
1727 trace_ocfs2_recover_node(node_num, slot_num, osb->node_num);
1728
1729
1730
1731 BUG_ON(osb->node_num == node_num);
1732
1733 status = ocfs2_replay_journal(osb, node_num, slot_num);
1734 if (status < 0) {
1735 if (status == -EBUSY) {
1736 trace_ocfs2_recover_node_skip(slot_num, node_num);
1737 status = 0;
1738 goto done;
1739 }
1740 mlog_errno(status);
1741 goto done;
1742 }
1743
1744
1745 status = ocfs2_begin_local_alloc_recovery(osb, slot_num, &la_copy);
1746 if (status < 0) {
1747 mlog_errno(status);
1748 goto done;
1749 }
1750
1751
1752
1753
1754 status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy);
1755 if (status < 0)
1756 mlog_errno(status);
1757
1758
1759
1760 status = ocfs2_clear_slot(osb, slot_num);
1761 if (status < 0)
1762 mlog_errno(status);
1763
1764
1765 ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
1766 tl_copy, NULL, ORPHAN_NEED_TRUNCATE);
1767
1768 status = 0;
1769done:
1770
1771 return status;
1772}
1773
1774
1775
1776
1777static int ocfs2_trylock_journal(struct ocfs2_super *osb,
1778 int slot_num)
1779{
1780 int status, flags;
1781 struct inode *inode = NULL;
1782
1783 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1784 slot_num);
1785 if (inode == NULL) {
1786 mlog(ML_ERROR, "access error\n");
1787 status = -EACCES;
1788 goto bail;
1789 }
1790 if (is_bad_inode(inode)) {
1791 mlog(ML_ERROR, "access error (bad inode)\n");
1792 iput(inode);
1793 inode = NULL;
1794 status = -EACCES;
1795 goto bail;
1796 }
1797 SET_INODE_JOURNAL(inode);
1798
1799 flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE;
1800 status = ocfs2_inode_lock_full(inode, NULL, 1, flags);
1801 if (status < 0) {
1802 if (status != -EAGAIN)
1803 mlog_errno(status);
1804 goto bail;
1805 }
1806
1807 ocfs2_inode_unlock(inode, 1);
1808bail:
1809 iput(inode);
1810
1811 return status;
1812}
1813
1814
1815
1816int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1817{
1818 unsigned int node_num;
1819 int status, i;
1820 u32 gen;
1821 struct buffer_head *bh = NULL;
1822 struct ocfs2_dinode *di;
1823
1824
1825
1826
1827 for (i = 0; i < osb->max_slots; i++) {
1828
1829 status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
1830 if (status) {
1831 mlog_errno(status);
1832 goto bail;
1833 }
1834 di = (struct ocfs2_dinode *)bh->b_data;
1835 gen = ocfs2_get_recovery_generation(di);
1836 brelse(bh);
1837 bh = NULL;
1838
1839 spin_lock(&osb->osb_lock);
1840 osb->slot_recovery_generations[i] = gen;
1841
1842 trace_ocfs2_mark_dead_nodes(i,
1843 osb->slot_recovery_generations[i]);
1844
1845 if (i == osb->slot_num) {
1846 spin_unlock(&osb->osb_lock);
1847 continue;
1848 }
1849
1850 status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
1851 if (status == -ENOENT) {
1852 spin_unlock(&osb->osb_lock);
1853 continue;
1854 }
1855
1856 if (__ocfs2_recovery_map_test(osb, node_num)) {
1857 spin_unlock(&osb->osb_lock);
1858 continue;
1859 }
1860 spin_unlock(&osb->osb_lock);
1861
1862
1863
1864
1865 status = ocfs2_trylock_journal(osb, i);
1866 if (!status) {
1867
1868
1869
1870 ocfs2_recovery_thread(osb, node_num);
1871 } else if ((status < 0) && (status != -EAGAIN)) {
1872 mlog_errno(status);
1873 goto bail;
1874 }
1875 }
1876
1877 status = 0;
1878bail:
1879 return status;
1880}
1881
1882
1883
1884
1885
1886
1887static inline unsigned long ocfs2_orphan_scan_timeout(void)
1888{
1889 unsigned long time;
1890
1891 get_random_bytes(&time, sizeof(time));
1892 time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000);
1893 return msecs_to_jiffies(time);
1894}
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923static void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1924{
1925 struct ocfs2_orphan_scan *os;
1926 int status, i;
1927 u32 seqno = 0;
1928
1929 os = &osb->osb_orphan_scan;
1930
1931 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1932 goto out;
1933
1934 trace_ocfs2_queue_orphan_scan_begin(os->os_count, os->os_seqno,
1935 atomic_read(&os->os_state));
1936
1937 status = ocfs2_orphan_scan_lock(osb, &seqno);
1938 if (status < 0) {
1939 if (status != -EAGAIN)
1940 mlog_errno(status);
1941 goto out;
1942 }
1943
1944
1945 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1946 goto unlock;
1947
1948 if (os->os_seqno != seqno) {
1949 os->os_seqno = seqno;
1950 goto unlock;
1951 }
1952
1953 for (i = 0; i < osb->max_slots; i++)
1954 ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
1955 NULL, ORPHAN_NO_NEED_TRUNCATE);
1956
1957
1958
1959
1960 seqno++;
1961 os->os_count++;
1962 os->os_scantime = ktime_get_seconds();
1963unlock:
1964 ocfs2_orphan_scan_unlock(osb, seqno);
1965out:
1966 trace_ocfs2_queue_orphan_scan_end(os->os_count, os->os_seqno,
1967 atomic_read(&os->os_state));
1968 return;
1969}
1970
1971
1972static void ocfs2_orphan_scan_work(struct work_struct *work)
1973{
1974 struct ocfs2_orphan_scan *os;
1975 struct ocfs2_super *osb;
1976
1977 os = container_of(work, struct ocfs2_orphan_scan,
1978 os_orphan_scan_work.work);
1979 osb = os->os_osb;
1980
1981 mutex_lock(&os->os_lock);
1982 ocfs2_queue_orphan_scan(osb);
1983 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
1984 queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
1985 ocfs2_orphan_scan_timeout());
1986 mutex_unlock(&os->os_lock);
1987}
1988
1989void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
1990{
1991 struct ocfs2_orphan_scan *os;
1992
1993 os = &osb->osb_orphan_scan;
1994 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) {
1995 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1996 mutex_lock(&os->os_lock);
1997 cancel_delayed_work(&os->os_orphan_scan_work);
1998 mutex_unlock(&os->os_lock);
1999 }
2000}
2001
2002void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
2003{
2004 struct ocfs2_orphan_scan *os;
2005
2006 os = &osb->osb_orphan_scan;
2007 os->os_osb = osb;
2008 os->os_count = 0;
2009 os->os_seqno = 0;
2010 mutex_init(&os->os_lock);
2011 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
2012}
2013
2014void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
2015{
2016 struct ocfs2_orphan_scan *os;
2017
2018 os = &osb->osb_orphan_scan;
2019 os->os_scantime = ktime_get_seconds();
2020 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
2021 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
2022 else {
2023 atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
2024 queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
2025 ocfs2_orphan_scan_timeout());
2026 }
2027}
2028
2029struct ocfs2_orphan_filldir_priv {
2030 struct dir_context ctx;
2031 struct inode *head;
2032 struct ocfs2_super *osb;
2033 enum ocfs2_orphan_reco_type orphan_reco_type;
2034};
2035
2036static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
2037 int name_len, loff_t pos, u64 ino,
2038 unsigned type)
2039{
2040 struct ocfs2_orphan_filldir_priv *p =
2041 container_of(ctx, struct ocfs2_orphan_filldir_priv, ctx);
2042 struct inode *iter;
2043
2044 if (name_len == 1 && !strncmp(".", name, 1))
2045 return 0;
2046 if (name_len == 2 && !strncmp("..", name, 2))
2047 return 0;
2048
2049
2050 if ((p->orphan_reco_type == ORPHAN_NO_NEED_TRUNCATE) &&
2051 (!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX,
2052 OCFS2_DIO_ORPHAN_PREFIX_LEN)))
2053 return 0;
2054
2055
2056 iter = ocfs2_iget(p->osb, ino,
2057 OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0);
2058 if (IS_ERR(iter))
2059 return 0;
2060
2061 if (!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX,
2062 OCFS2_DIO_ORPHAN_PREFIX_LEN))
2063 OCFS2_I(iter)->ip_flags |= OCFS2_INODE_DIO_ORPHAN_ENTRY;
2064
2065
2066
2067 if (OCFS2_I(iter)->ip_next_orphan) {
2068 iput(iter);
2069 return 0;
2070 }
2071
2072 trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno);
2073
2074
2075 OCFS2_I(iter)->ip_next_orphan = p->head;
2076 p->head = iter;
2077
2078 return 0;
2079}
2080
2081static int ocfs2_queue_orphans(struct ocfs2_super *osb,
2082 int slot,
2083 struct inode **head,
2084 enum ocfs2_orphan_reco_type orphan_reco_type)
2085{
2086 int status;
2087 struct inode *orphan_dir_inode = NULL;
2088 struct ocfs2_orphan_filldir_priv priv = {
2089 .ctx.actor = ocfs2_orphan_filldir,
2090 .osb = osb,
2091 .head = *head,
2092 .orphan_reco_type = orphan_reco_type
2093 };
2094
2095 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2096 ORPHAN_DIR_SYSTEM_INODE,
2097 slot);
2098 if (!orphan_dir_inode) {
2099 status = -ENOENT;
2100 mlog_errno(status);
2101 return status;
2102 }
2103
2104 inode_lock(orphan_dir_inode);
2105 status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
2106 if (status < 0) {
2107 mlog_errno(status);
2108 goto out;
2109 }
2110
2111 status = ocfs2_dir_foreach(orphan_dir_inode, &priv.ctx);
2112 if (status) {
2113 mlog_errno(status);
2114 goto out_cluster;
2115 }
2116
2117 *head = priv.head;
2118
2119out_cluster:
2120 ocfs2_inode_unlock(orphan_dir_inode, 0);
2121out:
2122 inode_unlock(orphan_dir_inode);
2123 iput(orphan_dir_inode);
2124 return status;
2125}
2126
2127static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb,
2128 int slot)
2129{
2130 int ret;
2131
2132 spin_lock(&osb->osb_lock);
2133 ret = !osb->osb_orphan_wipes[slot];
2134 spin_unlock(&osb->osb_lock);
2135 return ret;
2136}
2137
2138static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb,
2139 int slot)
2140{
2141 spin_lock(&osb->osb_lock);
2142
2143
2144 ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
2145 while (osb->osb_orphan_wipes[slot]) {
2146
2147
2148
2149 spin_unlock(&osb->osb_lock);
2150 wait_event_interruptible(osb->osb_wipe_event,
2151 ocfs2_orphan_recovery_can_continue(osb, slot));
2152 spin_lock(&osb->osb_lock);
2153 }
2154 spin_unlock(&osb->osb_lock);
2155}
2156
2157static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,
2158 int slot)
2159{
2160 ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
2161}
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181static int ocfs2_recover_orphans(struct ocfs2_super *osb,
2182 int slot,
2183 enum ocfs2_orphan_reco_type orphan_reco_type)
2184{
2185 int ret = 0;
2186 struct inode *inode = NULL;
2187 struct inode *iter;
2188 struct ocfs2_inode_info *oi;
2189 struct buffer_head *di_bh = NULL;
2190 struct ocfs2_dinode *di = NULL;
2191
2192 trace_ocfs2_recover_orphans(slot);
2193
2194 ocfs2_mark_recovering_orphan_dir(osb, slot);
2195 ret = ocfs2_queue_orphans(osb, slot, &inode, orphan_reco_type);
2196 ocfs2_clear_recovering_orphan_dir(osb, slot);
2197
2198
2199
2200 if (ret)
2201 mlog_errno(ret);
2202
2203 while (inode) {
2204 oi = OCFS2_I(inode);
2205 trace_ocfs2_recover_orphans_iput(
2206 (unsigned long long)oi->ip_blkno);
2207
2208 iter = oi->ip_next_orphan;
2209 oi->ip_next_orphan = NULL;
2210
2211 if (oi->ip_flags & OCFS2_INODE_DIO_ORPHAN_ENTRY) {
2212 inode_lock(inode);
2213 ret = ocfs2_rw_lock(inode, 1);
2214 if (ret < 0) {
2215 mlog_errno(ret);
2216 goto unlock_mutex;
2217 }
2218
2219
2220
2221
2222 ret = ocfs2_inode_lock(inode, &di_bh, 1);
2223 if (ret) {
2224 mlog_errno(ret);
2225 goto unlock_rw;
2226 }
2227
2228 di = (struct ocfs2_dinode *)di_bh->b_data;
2229
2230 if (di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL)) {
2231 ret = ocfs2_truncate_file(inode, di_bh,
2232 i_size_read(inode));
2233 if (ret < 0) {
2234 if (ret != -ENOSPC)
2235 mlog_errno(ret);
2236 goto unlock_inode;
2237 }
2238
2239 ret = ocfs2_del_inode_from_orphan(osb, inode,
2240 di_bh, 0, 0);
2241 if (ret)
2242 mlog_errno(ret);
2243 }
2244unlock_inode:
2245 ocfs2_inode_unlock(inode, 1);
2246 brelse(di_bh);
2247 di_bh = NULL;
2248unlock_rw:
2249 ocfs2_rw_unlock(inode, 1);
2250unlock_mutex:
2251 inode_unlock(inode);
2252
2253
2254 oi->ip_flags &= ~OCFS2_INODE_DIO_ORPHAN_ENTRY;
2255 } else {
2256 spin_lock(&oi->ip_lock);
2257
2258
2259 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
2260 spin_unlock(&oi->ip_lock);
2261 }
2262
2263 iput(inode);
2264 inode = iter;
2265 }
2266
2267 return ret;
2268}
2269
2270static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
2271{
2272
2273
2274
2275 wait_event(osb->osb_mount_event,
2276 (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) ||
2277 atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS ||
2278 atomic_read(&osb->vol_state) == VOLUME_DISABLED);
2279
2280
2281
2282
2283 if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) {
2284 trace_ocfs2_wait_on_mount(VOLUME_DISABLED);
2285 mlog(0, "mount error, exiting!\n");
2286 return -EBUSY;
2287 }
2288
2289 return 0;
2290}
2291
2292static int ocfs2_commit_thread(void *arg)
2293{
2294 int status;
2295 struct ocfs2_super *osb = arg;
2296 struct ocfs2_journal *journal = osb->journal;
2297
2298
2299
2300
2301
2302 while (!(kthread_should_stop() &&
2303 atomic_read(&journal->j_num_trans) == 0)) {
2304
2305 wait_event_interruptible(osb->checkpoint_event,
2306 atomic_read(&journal->j_num_trans)
2307 || kthread_should_stop());
2308
2309 status = ocfs2_commit_cache(osb);
2310 if (status < 0) {
2311 static unsigned long abort_warn_time;
2312
2313
2314 if (printk_timed_ratelimit(&abort_warn_time, 60*HZ))
2315 mlog(ML_ERROR, "status = %d, journal is "
2316 "already aborted.\n", status);
2317
2318
2319
2320
2321
2322 msleep_interruptible(1000);
2323 }
2324
2325 if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){
2326 mlog(ML_KTHREAD,
2327 "commit_thread: %u transactions pending on "
2328 "shutdown\n",
2329 atomic_read(&journal->j_num_trans));
2330 }
2331 }
2332
2333 return 0;
2334}
2335
2336
2337
2338
2339
2340
2341int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
2342{
2343 int ret = 0;
2344 unsigned int slot;
2345 struct buffer_head *di_bh = NULL;
2346 struct ocfs2_dinode *di;
2347 int journal_dirty = 0;
2348
2349 for(slot = 0; slot < osb->max_slots; slot++) {
2350 ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
2351 if (ret) {
2352 mlog_errno(ret);
2353 goto out;
2354 }
2355
2356 di = (struct ocfs2_dinode *) di_bh->b_data;
2357
2358 osb->slot_recovery_generations[slot] =
2359 ocfs2_get_recovery_generation(di);
2360
2361 if (le32_to_cpu(di->id1.journal1.ij_flags) &
2362 OCFS2_JOURNAL_DIRTY_FL)
2363 journal_dirty = 1;
2364
2365 brelse(di_bh);
2366 di_bh = NULL;
2367 }
2368
2369out:
2370 if (journal_dirty)
2371 ret = -EROFS;
2372 return ret;
2373}
2374