1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26#include <linux/fs.h>
27#include <linux/types.h>
28#include <linux/slab.h>
29#include <linux/highmem.h>
30#include <linux/kthread.h>
31#include <linux/time.h>
32#include <linux/random.h>
33#include <linux/delay.h>
34
35#include <cluster/masklog.h>
36
37#include "ocfs2.h"
38
39#include "alloc.h"
40#include "blockcheck.h"
41#include "dir.h"
42#include "dlmglue.h"
43#include "extent_map.h"
44#include "heartbeat.h"
45#include "inode.h"
46#include "journal.h"
47#include "localalloc.h"
48#include "slot_map.h"
49#include "super.h"
50#include "sysfile.h"
51#include "uptodate.h"
52#include "quota.h"
53#include "file.h"
54#include "namei.h"
55
56#include "buffer_head_io.h"
57#include "ocfs2_trace.h"
58
59DEFINE_SPINLOCK(trans_inc_lock);
60
61#define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000
62
63static int ocfs2_force_read_journal(struct inode *inode);
64static int ocfs2_recover_node(struct ocfs2_super *osb,
65 int node_num, int slot_num);
66static int __ocfs2_recovery_thread(void *arg);
67static int ocfs2_commit_cache(struct ocfs2_super *osb);
68static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota);
69static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
70 int dirty, int replayed);
71static int ocfs2_trylock_journal(struct ocfs2_super *osb,
72 int slot_num);
73static int ocfs2_recover_orphans(struct ocfs2_super *osb,
74 int slot,
75 enum ocfs2_orphan_reco_type orphan_reco_type);
76static int ocfs2_commit_thread(void *arg);
77static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
78 int slot_num,
79 struct ocfs2_dinode *la_dinode,
80 struct ocfs2_dinode *tl_dinode,
81 struct ocfs2_quota_recovery *qrec,
82 enum ocfs2_orphan_reco_type orphan_reco_type);
83
84static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
85{
86 return __ocfs2_wait_on_mount(osb, 0);
87}
88
89static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
90{
91 return __ocfs2_wait_on_mount(osb, 1);
92}
93
94
95
96
97
98
99enum ocfs2_replay_state {
100 REPLAY_UNNEEDED = 0,
101 REPLAY_NEEDED,
102 REPLAY_DONE
103};
104
105struct ocfs2_replay_map {
106 unsigned int rm_slots;
107 enum ocfs2_replay_state rm_state;
108 unsigned char rm_replay_slots[0];
109};
110
111static void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
112{
113 if (!osb->replay_map)
114 return;
115
116
117 if (osb->replay_map->rm_state == REPLAY_DONE)
118 return;
119
120 osb->replay_map->rm_state = state;
121}
122
123int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
124{
125 struct ocfs2_replay_map *replay_map;
126 int i, node_num;
127
128
129 if (osb->replay_map)
130 return 0;
131
132 replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
133 (osb->max_slots * sizeof(char)), GFP_KERNEL);
134
135 if (!replay_map) {
136 mlog_errno(-ENOMEM);
137 return -ENOMEM;
138 }
139
140 spin_lock(&osb->osb_lock);
141
142 replay_map->rm_slots = osb->max_slots;
143 replay_map->rm_state = REPLAY_UNNEEDED;
144
145
146 for (i = 0; i < replay_map->rm_slots; i++) {
147 if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT)
148 replay_map->rm_replay_slots[i] = 1;
149 }
150
151 osb->replay_map = replay_map;
152 spin_unlock(&osb->osb_lock);
153 return 0;
154}
155
156static void ocfs2_queue_replay_slots(struct ocfs2_super *osb,
157 enum ocfs2_orphan_reco_type orphan_reco_type)
158{
159 struct ocfs2_replay_map *replay_map = osb->replay_map;
160 int i;
161
162 if (!replay_map)
163 return;
164
165 if (replay_map->rm_state != REPLAY_NEEDED)
166 return;
167
168 for (i = 0; i < replay_map->rm_slots; i++)
169 if (replay_map->rm_replay_slots[i])
170 ocfs2_queue_recovery_completion(osb->journal, i, NULL,
171 NULL, NULL,
172 orphan_reco_type);
173 replay_map->rm_state = REPLAY_DONE;
174}
175
176static void ocfs2_free_replay_slots(struct ocfs2_super *osb)
177{
178 struct ocfs2_replay_map *replay_map = osb->replay_map;
179
180 if (!osb->replay_map)
181 return;
182
183 kfree(replay_map);
184 osb->replay_map = NULL;
185}
186
187int ocfs2_recovery_init(struct ocfs2_super *osb)
188{
189 struct ocfs2_recovery_map *rm;
190
191 mutex_init(&osb->recovery_lock);
192 osb->disable_recovery = 0;
193 osb->recovery_thread_task = NULL;
194 init_waitqueue_head(&osb->recovery_event);
195
196 rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
197 osb->max_slots * sizeof(unsigned int),
198 GFP_KERNEL);
199 if (!rm) {
200 mlog_errno(-ENOMEM);
201 return -ENOMEM;
202 }
203
204 rm->rm_entries = (unsigned int *)((char *)rm +
205 sizeof(struct ocfs2_recovery_map));
206 osb->recovery_map = rm;
207
208 return 0;
209}
210
211
212
213
214static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
215{
216 mb();
217 return osb->recovery_thread_task != NULL;
218}
219
220void ocfs2_recovery_exit(struct ocfs2_super *osb)
221{
222 struct ocfs2_recovery_map *rm;
223
224
225
226 mutex_lock(&osb->recovery_lock);
227 osb->disable_recovery = 1;
228 mutex_unlock(&osb->recovery_lock);
229 wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
230
231
232
233
234 flush_workqueue(osb->ocfs2_wq);
235
236
237
238
239
240 rm = osb->recovery_map;
241
242
243 kfree(rm);
244}
245
246static int __ocfs2_recovery_map_test(struct ocfs2_super *osb,
247 unsigned int node_num)
248{
249 int i;
250 struct ocfs2_recovery_map *rm = osb->recovery_map;
251
252 assert_spin_locked(&osb->osb_lock);
253
254 for (i = 0; i < rm->rm_used; i++) {
255 if (rm->rm_entries[i] == node_num)
256 return 1;
257 }
258
259 return 0;
260}
261
262
263static int ocfs2_recovery_map_set(struct ocfs2_super *osb,
264 unsigned int node_num)
265{
266 struct ocfs2_recovery_map *rm = osb->recovery_map;
267
268 spin_lock(&osb->osb_lock);
269 if (__ocfs2_recovery_map_test(osb, node_num)) {
270 spin_unlock(&osb->osb_lock);
271 return 1;
272 }
273
274
275 BUG_ON(rm->rm_used >= osb->max_slots);
276
277 rm->rm_entries[rm->rm_used] = node_num;
278 rm->rm_used++;
279 spin_unlock(&osb->osb_lock);
280
281 return 0;
282}
283
284static void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
285 unsigned int node_num)
286{
287 int i;
288 struct ocfs2_recovery_map *rm = osb->recovery_map;
289
290 spin_lock(&osb->osb_lock);
291
292 for (i = 0; i < rm->rm_used; i++) {
293 if (rm->rm_entries[i] == node_num)
294 break;
295 }
296
297 if (i < rm->rm_used) {
298
299 memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]),
300 (rm->rm_used - i - 1) * sizeof(unsigned int));
301 rm->rm_used--;
302 }
303
304 spin_unlock(&osb->osb_lock);
305}
306
307static int ocfs2_commit_cache(struct ocfs2_super *osb)
308{
309 int status = 0;
310 unsigned int flushed;
311 struct ocfs2_journal *journal = NULL;
312
313 journal = osb->journal;
314
315
316 down_write(&journal->j_trans_barrier);
317
318 flushed = atomic_read(&journal->j_num_trans);
319 trace_ocfs2_commit_cache_begin(flushed);
320 if (flushed == 0) {
321 up_write(&journal->j_trans_barrier);
322 goto finally;
323 }
324
325 jbd2_journal_lock_updates(journal->j_journal);
326 status = jbd2_journal_flush(journal->j_journal);
327 jbd2_journal_unlock_updates(journal->j_journal);
328 if (status < 0) {
329 up_write(&journal->j_trans_barrier);
330 mlog_errno(status);
331 goto finally;
332 }
333
334 ocfs2_inc_trans_id(journal);
335
336 flushed = atomic_read(&journal->j_num_trans);
337 atomic_set(&journal->j_num_trans, 0);
338 up_write(&journal->j_trans_barrier);
339
340 trace_ocfs2_commit_cache_end(journal->j_trans_id, flushed);
341
342 ocfs2_wake_downconvert_thread(osb);
343 wake_up(&journal->j_checkpointed);
344finally:
345 return status;
346}
347
348handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
349{
350 journal_t *journal = osb->journal->j_journal;
351 handle_t *handle;
352
353 BUG_ON(!osb || !osb->journal->j_journal);
354
355 if (ocfs2_is_hard_readonly(osb))
356 return ERR_PTR(-EROFS);
357
358 BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
359 BUG_ON(max_buffs <= 0);
360
361
362 if (journal_current_handle())
363 return jbd2_journal_start(journal, max_buffs);
364
365 sb_start_intwrite(osb->sb);
366
367 down_read(&osb->journal->j_trans_barrier);
368
369 handle = jbd2_journal_start(journal, max_buffs);
370 if (IS_ERR(handle)) {
371 up_read(&osb->journal->j_trans_barrier);
372 sb_end_intwrite(osb->sb);
373
374 mlog_errno(PTR_ERR(handle));
375
376 if (is_journal_aborted(journal)) {
377 ocfs2_abort(osb->sb, "Detected aborted journal\n");
378 handle = ERR_PTR(-EROFS);
379 }
380 } else {
381 if (!ocfs2_mount_local(osb))
382 atomic_inc(&(osb->journal->j_num_trans));
383 }
384
385 return handle;
386}
387
388int ocfs2_commit_trans(struct ocfs2_super *osb,
389 handle_t *handle)
390{
391 int ret, nested;
392 struct ocfs2_journal *journal = osb->journal;
393
394 BUG_ON(!handle);
395
396 nested = handle->h_ref > 1;
397 ret = jbd2_journal_stop(handle);
398 if (ret < 0)
399 mlog_errno(ret);
400
401 if (!nested) {
402 up_read(&journal->j_trans_barrier);
403 sb_end_intwrite(osb->sb);
404 }
405
406 return ret;
407}
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426int ocfs2_extend_trans(handle_t *handle, int nblocks)
427{
428 int status, old_nblocks;
429
430 BUG_ON(!handle);
431 BUG_ON(nblocks < 0);
432
433 if (!nblocks)
434 return 0;
435
436 old_nblocks = handle->h_buffer_credits;
437
438 trace_ocfs2_extend_trans(old_nblocks, nblocks);
439
440#ifdef CONFIG_OCFS2_DEBUG_FS
441 status = 1;
442#else
443 status = jbd2_journal_extend(handle, nblocks);
444 if (status < 0) {
445 mlog_errno(status);
446 goto bail;
447 }
448#endif
449
450 if (status > 0) {
451 trace_ocfs2_extend_trans_restart(old_nblocks + nblocks);
452 status = jbd2_journal_restart(handle,
453 old_nblocks + nblocks);
454 if (status < 0) {
455 mlog_errno(status);
456 goto bail;
457 }
458 }
459
460 status = 0;
461bail:
462 return status;
463}
464
465
466
467
468
469
470
471int ocfs2_allocate_extend_trans(handle_t *handle, int thresh)
472{
473 int status, old_nblks;
474
475 BUG_ON(!handle);
476
477 old_nblks = handle->h_buffer_credits;
478 trace_ocfs2_allocate_extend_trans(old_nblks, thresh);
479
480 if (old_nblks < thresh)
481 return 0;
482
483 status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA);
484 if (status < 0) {
485 mlog_errno(status);
486 goto bail;
487 }
488
489 if (status > 0) {
490 status = jbd2_journal_restart(handle, OCFS2_MAX_TRANS_DATA);
491 if (status < 0)
492 mlog_errno(status);
493 }
494
495bail:
496 return status;
497}
498
499
500struct ocfs2_triggers {
501 struct jbd2_buffer_trigger_type ot_triggers;
502 int ot_offset;
503};
504
505static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
506{
507 return container_of(triggers, struct ocfs2_triggers, ot_triggers);
508}
509
510static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
511 struct buffer_head *bh,
512 void *data, size_t size)
513{
514 struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
515
516
517
518
519
520
521
522 ocfs2_block_check_compute(data, size, data + ot->ot_offset);
523}
524
525
526
527
528
529static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
530 struct buffer_head *bh,
531 void *data, size_t size)
532{
533 struct ocfs2_disk_dqtrailer *dqt =
534 ocfs2_block_dqtrailer(size, data);
535
536
537
538
539
540
541
542 ocfs2_block_check_compute(data, size, &dqt->dq_check);
543}
544
545
546
547
548
549static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
550 struct buffer_head *bh,
551 void *data, size_t size)
552{
553 struct ocfs2_dir_block_trailer *trailer =
554 ocfs2_dir_trailer_from_size(size, data);
555
556
557
558
559
560
561
562 ocfs2_block_check_compute(data, size, &trailer->db_check);
563}
564
565static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
566 struct buffer_head *bh)
567{
568 mlog(ML_ERROR,
569 "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, "
570 "bh->b_blocknr = %llu\n",
571 (unsigned long)bh,
572 (unsigned long long)bh->b_blocknr);
573
574 ocfs2_error(bh->b_bdev->bd_super,
575 "JBD2 has aborted our journal, ocfs2 cannot continue\n");
576}
577
578static struct ocfs2_triggers di_triggers = {
579 .ot_triggers = {
580 .t_frozen = ocfs2_frozen_trigger,
581 .t_abort = ocfs2_abort_trigger,
582 },
583 .ot_offset = offsetof(struct ocfs2_dinode, i_check),
584};
585
586static struct ocfs2_triggers eb_triggers = {
587 .ot_triggers = {
588 .t_frozen = ocfs2_frozen_trigger,
589 .t_abort = ocfs2_abort_trigger,
590 },
591 .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
592};
593
594static struct ocfs2_triggers rb_triggers = {
595 .ot_triggers = {
596 .t_frozen = ocfs2_frozen_trigger,
597 .t_abort = ocfs2_abort_trigger,
598 },
599 .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
600};
601
602static struct ocfs2_triggers gd_triggers = {
603 .ot_triggers = {
604 .t_frozen = ocfs2_frozen_trigger,
605 .t_abort = ocfs2_abort_trigger,
606 },
607 .ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
608};
609
610static struct ocfs2_triggers db_triggers = {
611 .ot_triggers = {
612 .t_frozen = ocfs2_db_frozen_trigger,
613 .t_abort = ocfs2_abort_trigger,
614 },
615};
616
617static struct ocfs2_triggers xb_triggers = {
618 .ot_triggers = {
619 .t_frozen = ocfs2_frozen_trigger,
620 .t_abort = ocfs2_abort_trigger,
621 },
622 .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
623};
624
625static struct ocfs2_triggers dq_triggers = {
626 .ot_triggers = {
627 .t_frozen = ocfs2_dq_frozen_trigger,
628 .t_abort = ocfs2_abort_trigger,
629 },
630};
631
632static struct ocfs2_triggers dr_triggers = {
633 .ot_triggers = {
634 .t_frozen = ocfs2_frozen_trigger,
635 .t_abort = ocfs2_abort_trigger,
636 },
637 .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
638};
639
640static struct ocfs2_triggers dl_triggers = {
641 .ot_triggers = {
642 .t_frozen = ocfs2_frozen_trigger,
643 .t_abort = ocfs2_abort_trigger,
644 },
645 .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
646};
647
648static int __ocfs2_journal_access(handle_t *handle,
649 struct ocfs2_caching_info *ci,
650 struct buffer_head *bh,
651 struct ocfs2_triggers *triggers,
652 int type)
653{
654 int status;
655 struct ocfs2_super *osb =
656 OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
657
658 BUG_ON(!ci || !ci->ci_ops);
659 BUG_ON(!handle);
660 BUG_ON(!bh);
661
662 trace_ocfs2_journal_access(
663 (unsigned long long)ocfs2_metadata_cache_owner(ci),
664 (unsigned long long)bh->b_blocknr, type, bh->b_size);
665
666
667 if (!buffer_uptodate(bh)) {
668 mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n");
669 mlog(ML_ERROR, "b_blocknr=%llu, b_state=0x%lx\n",
670 (unsigned long long)bh->b_blocknr, bh->b_state);
671
672 lock_buffer(bh);
673
674
675
676
677
678
679
680
681
682
683 if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) {
684 unlock_buffer(bh);
685 return ocfs2_error(osb->sb, "A previous attempt to "
686 "write this buffer head failed\n");
687 }
688 unlock_buffer(bh);
689 }
690
691
692
693
694
695
696
697 ocfs2_set_ci_lock_trans(osb->journal, ci);
698
699 ocfs2_metadata_cache_io_lock(ci);
700 switch (type) {
701 case OCFS2_JOURNAL_ACCESS_CREATE:
702 case OCFS2_JOURNAL_ACCESS_WRITE:
703 status = jbd2_journal_get_write_access(handle, bh);
704 break;
705
706 case OCFS2_JOURNAL_ACCESS_UNDO:
707 status = jbd2_journal_get_undo_access(handle, bh);
708 break;
709
710 default:
711 status = -EINVAL;
712 mlog(ML_ERROR, "Unknown access type!\n");
713 }
714 if (!status && ocfs2_meta_ecc(osb) && triggers)
715 jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
716 ocfs2_metadata_cache_io_unlock(ci);
717
718 if (status < 0)
719 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n",
720 status, type);
721
722 return status;
723}
724
725int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
726 struct buffer_head *bh, int type)
727{
728 return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
729}
730
731int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
732 struct buffer_head *bh, int type)
733{
734 return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
735}
736
737int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
738 struct buffer_head *bh, int type)
739{
740 return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
741 type);
742}
743
744int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
745 struct buffer_head *bh, int type)
746{
747 return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
748}
749
750int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
751 struct buffer_head *bh, int type)
752{
753 return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
754}
755
756int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
757 struct buffer_head *bh, int type)
758{
759 return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
760}
761
762int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
763 struct buffer_head *bh, int type)
764{
765 return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
766}
767
768int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
769 struct buffer_head *bh, int type)
770{
771 return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
772}
773
774int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
775 struct buffer_head *bh, int type)
776{
777 return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
778}
779
780int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
781 struct buffer_head *bh, int type)
782{
783 return __ocfs2_journal_access(handle, ci, bh, NULL, type);
784}
785
786void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
787{
788 int status;
789
790 trace_ocfs2_journal_dirty((unsigned long long)bh->b_blocknr);
791
792 status = jbd2_journal_dirty_metadata(handle, bh);
793 if (status) {
794 mlog_errno(status);
795 if (!is_handle_aborted(handle)) {
796 journal_t *journal = handle->h_transaction->t_journal;
797 struct super_block *sb = bh->b_bdev->bd_super;
798
799 mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. "
800 "Aborting transaction and journal.\n");
801 handle->h_err = status;
802 jbd2_journal_abort_handle(handle);
803 jbd2_journal_abort(journal, status);
804 ocfs2_abort(sb, "Journal already aborted.\n");
805 }
806 }
807}
808
809#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
810
811void ocfs2_set_journal_params(struct ocfs2_super *osb)
812{
813 journal_t *journal = osb->journal->j_journal;
814 unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
815
816 if (osb->osb_commit_interval)
817 commit_interval = osb->osb_commit_interval;
818
819 write_lock(&journal->j_state_lock);
820 journal->j_commit_interval = commit_interval;
821 if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
822 journal->j_flags |= JBD2_BARRIER;
823 else
824 journal->j_flags &= ~JBD2_BARRIER;
825 write_unlock(&journal->j_state_lock);
826}
827
828int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
829{
830 int status = -1;
831 struct inode *inode = NULL;
832 journal_t *j_journal = NULL;
833 struct ocfs2_dinode *di = NULL;
834 struct buffer_head *bh = NULL;
835 struct ocfs2_super *osb;
836 int inode_lock = 0;
837
838 BUG_ON(!journal);
839
840 osb = journal->j_osb;
841
842
843 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
844 osb->slot_num);
845 if (inode == NULL) {
846 status = -EACCES;
847 mlog_errno(status);
848 goto done;
849 }
850 if (is_bad_inode(inode)) {
851 mlog(ML_ERROR, "access error (bad inode)\n");
852 iput(inode);
853 inode = NULL;
854 status = -EACCES;
855 goto done;
856 }
857
858 SET_INODE_JOURNAL(inode);
859 OCFS2_I(inode)->ip_open_count++;
860
861
862
863
864 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
865 if (status < 0) {
866 if (status != -ERESTARTSYS)
867 mlog(ML_ERROR, "Could not get lock on journal!\n");
868 goto done;
869 }
870
871 inode_lock = 1;
872 di = (struct ocfs2_dinode *)bh->b_data;
873
874 if (i_size_read(inode) < OCFS2_MIN_JOURNAL_SIZE) {
875 mlog(ML_ERROR, "Journal file size (%lld) is too small!\n",
876 i_size_read(inode));
877 status = -EINVAL;
878 goto done;
879 }
880
881 trace_ocfs2_journal_init(i_size_read(inode),
882 (unsigned long long)inode->i_blocks,
883 OCFS2_I(inode)->ip_clusters);
884
885
886 j_journal = jbd2_journal_init_inode(inode);
887 if (j_journal == NULL) {
888 mlog(ML_ERROR, "Linux journal layer error\n");
889 status = -EINVAL;
890 goto done;
891 }
892
893 trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen);
894
895 *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
896 OCFS2_JOURNAL_DIRTY_FL);
897
898 journal->j_journal = j_journal;
899 journal->j_inode = inode;
900 journal->j_bh = bh;
901
902 ocfs2_set_journal_params(osb);
903
904 journal->j_state = OCFS2_JOURNAL_LOADED;
905
906 status = 0;
907done:
908 if (status < 0) {
909 if (inode_lock)
910 ocfs2_inode_unlock(inode, 1);
911 brelse(bh);
912 if (inode) {
913 OCFS2_I(inode)->ip_open_count--;
914 iput(inode);
915 }
916 }
917
918 return status;
919}
920
921static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
922{
923 le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
924}
925
926static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
927{
928 return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
929}
930
931static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
932 int dirty, int replayed)
933{
934 int status;
935 unsigned int flags;
936 struct ocfs2_journal *journal = osb->journal;
937 struct buffer_head *bh = journal->j_bh;
938 struct ocfs2_dinode *fe;
939
940 fe = (struct ocfs2_dinode *)bh->b_data;
941
942
943
944
945 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
946
947 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
948 if (dirty)
949 flags |= OCFS2_JOURNAL_DIRTY_FL;
950 else
951 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
952 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
953
954 if (replayed)
955 ocfs2_bump_recovery_generation(fe);
956
957 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
958 status = ocfs2_write_block(osb, bh, INODE_CACHE(journal->j_inode));
959 if (status < 0)
960 mlog_errno(status);
961
962 return status;
963}
964
965
966
967
968
969void ocfs2_journal_shutdown(struct ocfs2_super *osb)
970{
971 struct ocfs2_journal *journal = NULL;
972 int status = 0;
973 struct inode *inode = NULL;
974 int num_running_trans = 0;
975
976 BUG_ON(!osb);
977
978 journal = osb->journal;
979 if (!journal)
980 goto done;
981
982 inode = journal->j_inode;
983
984 if (journal->j_state != OCFS2_JOURNAL_LOADED)
985 goto done;
986
987
988 if (!igrab(inode))
989 BUG();
990
991 num_running_trans = atomic_read(&(osb->journal->j_num_trans));
992 trace_ocfs2_journal_shutdown(num_running_trans);
993
994
995
996
997
998 journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN;
999
1000
1001
1002
1003 if (osb->commit_task) {
1004
1005 trace_ocfs2_journal_shutdown_wait(osb->commit_task);
1006 kthread_stop(osb->commit_task);
1007 osb->commit_task = NULL;
1008 }
1009
1010 BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
1011
1012 if (ocfs2_mount_local(osb)) {
1013 jbd2_journal_lock_updates(journal->j_journal);
1014 status = jbd2_journal_flush(journal->j_journal);
1015 jbd2_journal_unlock_updates(journal->j_journal);
1016 if (status < 0)
1017 mlog_errno(status);
1018 }
1019
1020 if (status == 0) {
1021
1022
1023
1024
1025 status = ocfs2_journal_toggle_dirty(osb, 0, 0);
1026 if (status < 0)
1027 mlog_errno(status);
1028 }
1029
1030
1031 jbd2_journal_destroy(journal->j_journal);
1032 journal->j_journal = NULL;
1033
1034 OCFS2_I(inode)->ip_open_count--;
1035
1036
1037 ocfs2_inode_unlock(inode, 1);
1038
1039 brelse(journal->j_bh);
1040 journal->j_bh = NULL;
1041
1042 journal->j_state = OCFS2_JOURNAL_FREE;
1043
1044
1045done:
1046 iput(inode);
1047}
1048
1049static void ocfs2_clear_journal_error(struct super_block *sb,
1050 journal_t *journal,
1051 int slot)
1052{
1053 int olderr;
1054
1055 olderr = jbd2_journal_errno(journal);
1056 if (olderr) {
1057 mlog(ML_ERROR, "File system error %d recorded in "
1058 "journal %u.\n", olderr, slot);
1059 mlog(ML_ERROR, "File system on device %s needs checking.\n",
1060 sb->s_id);
1061
1062 jbd2_journal_ack_err(journal);
1063 jbd2_journal_clear_err(journal);
1064 }
1065}
1066
1067int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
1068{
1069 int status = 0;
1070 struct ocfs2_super *osb;
1071
1072 BUG_ON(!journal);
1073
1074 osb = journal->j_osb;
1075
1076 status = jbd2_journal_load(journal->j_journal);
1077 if (status < 0) {
1078 mlog(ML_ERROR, "Failed to load journal!\n");
1079 goto done;
1080 }
1081
1082 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
1083
1084 status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
1085 if (status < 0) {
1086 mlog_errno(status);
1087 goto done;
1088 }
1089
1090
1091 if (!local) {
1092 osb->commit_task = kthread_run(ocfs2_commit_thread, osb,
1093 "ocfs2cmt-%s", osb->uuid_str);
1094 if (IS_ERR(osb->commit_task)) {
1095 status = PTR_ERR(osb->commit_task);
1096 osb->commit_task = NULL;
1097 mlog(ML_ERROR, "unable to launch ocfs2commit thread, "
1098 "error=%d", status);
1099 goto done;
1100 }
1101 } else
1102 osb->commit_task = NULL;
1103
1104done:
1105 return status;
1106}
1107
1108
1109
1110
1111int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
1112{
1113 int status;
1114
1115 BUG_ON(!journal);
1116
1117 status = jbd2_journal_wipe(journal->j_journal, full);
1118 if (status < 0) {
1119 mlog_errno(status);
1120 goto bail;
1121 }
1122
1123 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
1124 if (status < 0)
1125 mlog_errno(status);
1126
1127bail:
1128 return status;
1129}
1130
1131static int ocfs2_recovery_completed(struct ocfs2_super *osb)
1132{
1133 int empty;
1134 struct ocfs2_recovery_map *rm = osb->recovery_map;
1135
1136 spin_lock(&osb->osb_lock);
1137 empty = (rm->rm_used == 0);
1138 spin_unlock(&osb->osb_lock);
1139
1140 return empty;
1141}
1142
1143void ocfs2_wait_for_recovery(struct ocfs2_super *osb)
1144{
1145 wait_event(osb->recovery_event, ocfs2_recovery_completed(osb));
1146}
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158static int ocfs2_force_read_journal(struct inode *inode)
1159{
1160 int status = 0;
1161 int i;
1162 u64 v_blkno, p_blkno, p_blocks, num_blocks;
1163 struct buffer_head *bh = NULL;
1164 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1165
1166 num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
1167 v_blkno = 0;
1168 while (v_blkno < num_blocks) {
1169 status = ocfs2_extent_map_get_blocks(inode, v_blkno,
1170 &p_blkno, &p_blocks, NULL);
1171 if (status < 0) {
1172 mlog_errno(status);
1173 goto bail;
1174 }
1175
1176 for (i = 0; i < p_blocks; i++, p_blkno++) {
1177 bh = __find_get_block(osb->sb->s_bdev, p_blkno,
1178 osb->sb->s_blocksize);
1179
1180 if (!bh)
1181 continue;
1182
1183 brelse(bh);
1184 bh = NULL;
1185
1186
1187
1188 status = ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh);
1189 if (status < 0) {
1190 mlog_errno(status);
1191 goto bail;
1192 }
1193
1194 brelse(bh);
1195 bh = NULL;
1196 }
1197
1198 v_blkno += p_blocks;
1199 }
1200
1201bail:
1202 return status;
1203}
1204
1205struct ocfs2_la_recovery_item {
1206 struct list_head lri_list;
1207 int lri_slot;
1208 struct ocfs2_dinode *lri_la_dinode;
1209 struct ocfs2_dinode *lri_tl_dinode;
1210 struct ocfs2_quota_recovery *lri_qrec;
1211 enum ocfs2_orphan_reco_type lri_orphan_reco_type;
1212};
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224void ocfs2_complete_recovery(struct work_struct *work)
1225{
1226 int ret = 0;
1227 struct ocfs2_journal *journal =
1228 container_of(work, struct ocfs2_journal, j_recovery_work);
1229 struct ocfs2_super *osb = journal->j_osb;
1230 struct ocfs2_dinode *la_dinode, *tl_dinode;
1231 struct ocfs2_la_recovery_item *item, *n;
1232 struct ocfs2_quota_recovery *qrec;
1233 enum ocfs2_orphan_reco_type orphan_reco_type;
1234 LIST_HEAD(tmp_la_list);
1235
1236 trace_ocfs2_complete_recovery(
1237 (unsigned long long)OCFS2_I(journal->j_inode)->ip_blkno);
1238
1239 spin_lock(&journal->j_lock);
1240 list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
1241 spin_unlock(&journal->j_lock);
1242
1243 list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
1244 list_del_init(&item->lri_list);
1245
1246 ocfs2_wait_on_quotas(osb);
1247
1248 la_dinode = item->lri_la_dinode;
1249 tl_dinode = item->lri_tl_dinode;
1250 qrec = item->lri_qrec;
1251 orphan_reco_type = item->lri_orphan_reco_type;
1252
1253 trace_ocfs2_complete_recovery_slot(item->lri_slot,
1254 la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0,
1255 tl_dinode ? le64_to_cpu(tl_dinode->i_blkno) : 0,
1256 qrec);
1257
1258 if (la_dinode) {
1259 ret = ocfs2_complete_local_alloc_recovery(osb,
1260 la_dinode);
1261 if (ret < 0)
1262 mlog_errno(ret);
1263
1264 kfree(la_dinode);
1265 }
1266
1267 if (tl_dinode) {
1268 ret = ocfs2_complete_truncate_log_recovery(osb,
1269 tl_dinode);
1270 if (ret < 0)
1271 mlog_errno(ret);
1272
1273 kfree(tl_dinode);
1274 }
1275
1276 ret = ocfs2_recover_orphans(osb, item->lri_slot,
1277 orphan_reco_type);
1278 if (ret < 0)
1279 mlog_errno(ret);
1280
1281 if (qrec) {
1282 ret = ocfs2_finish_quota_recovery(osb, qrec,
1283 item->lri_slot);
1284 if (ret < 0)
1285 mlog_errno(ret);
1286
1287 }
1288
1289 kfree(item);
1290 }
1291
1292 trace_ocfs2_complete_recovery_end(ret);
1293}
1294
1295
1296
1297
1298static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
1299 int slot_num,
1300 struct ocfs2_dinode *la_dinode,
1301 struct ocfs2_dinode *tl_dinode,
1302 struct ocfs2_quota_recovery *qrec,
1303 enum ocfs2_orphan_reco_type orphan_reco_type)
1304{
1305 struct ocfs2_la_recovery_item *item;
1306
1307 item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS);
1308 if (!item) {
1309
1310
1311
1312 kfree(la_dinode);
1313 kfree(tl_dinode);
1314
1315 if (qrec)
1316 ocfs2_free_quota_recovery(qrec);
1317
1318 mlog_errno(-ENOMEM);
1319 return;
1320 }
1321
1322 INIT_LIST_HEAD(&item->lri_list);
1323 item->lri_la_dinode = la_dinode;
1324 item->lri_slot = slot_num;
1325 item->lri_tl_dinode = tl_dinode;
1326 item->lri_qrec = qrec;
1327 item->lri_orphan_reco_type = orphan_reco_type;
1328
1329 spin_lock(&journal->j_lock);
1330 list_add_tail(&item->lri_list, &journal->j_la_cleanups);
1331 queue_work(journal->j_osb->ocfs2_wq, &journal->j_recovery_work);
1332 spin_unlock(&journal->j_lock);
1333}
1334
1335
1336
1337void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
1338{
1339 struct ocfs2_journal *journal = osb->journal;
1340
1341 if (ocfs2_is_hard_readonly(osb))
1342 return;
1343
1344
1345
1346 ocfs2_queue_recovery_completion(journal, osb->slot_num,
1347 osb->local_alloc_copy, NULL, NULL,
1348 ORPHAN_NEED_TRUNCATE);
1349 ocfs2_schedule_truncate_log_flush(osb, 0);
1350
1351 osb->local_alloc_copy = NULL;
1352
1353
1354 ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
1355 ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
1356 ocfs2_free_replay_slots(osb);
1357}
1358
1359void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
1360{
1361 if (osb->quota_rec) {
1362 ocfs2_queue_recovery_completion(osb->journal,
1363 osb->slot_num,
1364 NULL,
1365 NULL,
1366 osb->quota_rec,
1367 ORPHAN_NEED_TRUNCATE);
1368 osb->quota_rec = NULL;
1369 }
1370}
1371
1372static int __ocfs2_recovery_thread(void *arg)
1373{
1374 int status, node_num, slot_num;
1375 struct ocfs2_super *osb = arg;
1376 struct ocfs2_recovery_map *rm = osb->recovery_map;
1377 int *rm_quota = NULL;
1378 int rm_quota_used = 0, i;
1379 struct ocfs2_quota_recovery *qrec;
1380
1381 status = ocfs2_wait_on_mount(osb);
1382 if (status < 0) {
1383 goto bail;
1384 }
1385
1386 rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS);
1387 if (!rm_quota) {
1388 status = -ENOMEM;
1389 goto bail;
1390 }
1391restart:
1392 status = ocfs2_super_lock(osb, 1);
1393 if (status < 0) {
1394 mlog_errno(status);
1395 goto bail;
1396 }
1397
1398 status = ocfs2_compute_replay_slots(osb);
1399 if (status < 0)
1400 mlog_errno(status);
1401
1402
1403 ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
1404 NULL, NULL, ORPHAN_NO_NEED_TRUNCATE);
1405
1406 spin_lock(&osb->osb_lock);
1407 while (rm->rm_used) {
1408
1409
1410 node_num = rm->rm_entries[0];
1411 spin_unlock(&osb->osb_lock);
1412 slot_num = ocfs2_node_num_to_slot(osb, node_num);
1413 trace_ocfs2_recovery_thread_node(node_num, slot_num);
1414 if (slot_num == -ENOENT) {
1415 status = 0;
1416 goto skip_recovery;
1417 }
1418
1419
1420
1421
1422
1423
1424
1425 for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++);
1426 if (i == rm_quota_used)
1427 rm_quota[rm_quota_used++] = slot_num;
1428
1429 status = ocfs2_recover_node(osb, node_num, slot_num);
1430skip_recovery:
1431 if (!status) {
1432 ocfs2_recovery_map_clear(osb, node_num);
1433 } else {
1434 mlog(ML_ERROR,
1435 "Error %d recovering node %d on device (%u,%u)!\n",
1436 status, node_num,
1437 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
1438 mlog(ML_ERROR, "Volume requires unmount.\n");
1439 }
1440
1441 spin_lock(&osb->osb_lock);
1442 }
1443 spin_unlock(&osb->osb_lock);
1444 trace_ocfs2_recovery_thread_end(status);
1445
1446
1447 status = ocfs2_check_journals_nolocks(osb);
1448 status = (status == -EROFS) ? 0 : status;
1449 if (status < 0)
1450 mlog_errno(status);
1451
1452
1453
1454
1455 for (i = 0; i < rm_quota_used; i++) {
1456 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
1457 if (IS_ERR(qrec)) {
1458 status = PTR_ERR(qrec);
1459 mlog_errno(status);
1460 continue;
1461 }
1462 ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
1463 NULL, NULL, qrec,
1464 ORPHAN_NEED_TRUNCATE);
1465 }
1466
1467 ocfs2_super_unlock(osb, 1);
1468
1469
1470 ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
1471
1472bail:
1473 mutex_lock(&osb->recovery_lock);
1474 if (!status && !ocfs2_recovery_completed(osb)) {
1475 mutex_unlock(&osb->recovery_lock);
1476 goto restart;
1477 }
1478
1479 ocfs2_free_replay_slots(osb);
1480 osb->recovery_thread_task = NULL;
1481 mb();
1482 wake_up(&osb->recovery_event);
1483
1484 mutex_unlock(&osb->recovery_lock);
1485
1486 kfree(rm_quota);
1487
1488
1489
1490
1491 complete_and_exit(NULL, status);
1492}
1493
1494void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
1495{
1496 mutex_lock(&osb->recovery_lock);
1497
1498 trace_ocfs2_recovery_thread(node_num, osb->node_num,
1499 osb->disable_recovery, osb->recovery_thread_task,
1500 osb->disable_recovery ?
1501 -1 : ocfs2_recovery_map_set(osb, node_num));
1502
1503 if (osb->disable_recovery)
1504 goto out;
1505
1506 if (osb->recovery_thread_task)
1507 goto out;
1508
1509 osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb,
1510 "ocfs2rec-%s", osb->uuid_str);
1511 if (IS_ERR(osb->recovery_thread_task)) {
1512 mlog_errno((int)PTR_ERR(osb->recovery_thread_task));
1513 osb->recovery_thread_task = NULL;
1514 }
1515
1516out:
1517 mutex_unlock(&osb->recovery_lock);
1518 wake_up(&osb->recovery_event);
1519}
1520
1521static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
1522 int slot_num,
1523 struct buffer_head **bh,
1524 struct inode **ret_inode)
1525{
1526 int status = -EACCES;
1527 struct inode *inode = NULL;
1528
1529 BUG_ON(slot_num >= osb->max_slots);
1530
1531 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1532 slot_num);
1533 if (!inode || is_bad_inode(inode)) {
1534 mlog_errno(status);
1535 goto bail;
1536 }
1537 SET_INODE_JOURNAL(inode);
1538
1539 status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE);
1540 if (status < 0) {
1541 mlog_errno(status);
1542 goto bail;
1543 }
1544
1545 status = 0;
1546
1547bail:
1548 if (inode) {
1549 if (status || !ret_inode)
1550 iput(inode);
1551 else
1552 *ret_inode = inode;
1553 }
1554 return status;
1555}
1556
1557
1558
1559static int ocfs2_replay_journal(struct ocfs2_super *osb,
1560 int node_num,
1561 int slot_num)
1562{
1563 int status;
1564 int got_lock = 0;
1565 unsigned int flags;
1566 struct inode *inode = NULL;
1567 struct ocfs2_dinode *fe;
1568 journal_t *journal = NULL;
1569 struct buffer_head *bh = NULL;
1570 u32 slot_reco_gen;
1571
1572 status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
1573 if (status) {
1574 mlog_errno(status);
1575 goto done;
1576 }
1577
1578 fe = (struct ocfs2_dinode *)bh->b_data;
1579 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1580 brelse(bh);
1581 bh = NULL;
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591 if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
1592 trace_ocfs2_replay_journal_recovered(slot_num,
1593 osb->slot_recovery_generations[slot_num], slot_reco_gen);
1594 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1595 status = -EBUSY;
1596 goto done;
1597 }
1598
1599
1600
1601 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
1602 if (status < 0) {
1603 trace_ocfs2_replay_journal_lock_err(status);
1604 if (status != -ERESTARTSYS)
1605 mlog(ML_ERROR, "Could not lock journal!\n");
1606 goto done;
1607 }
1608 got_lock = 1;
1609
1610 fe = (struct ocfs2_dinode *) bh->b_data;
1611
1612 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1613 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1614
1615 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
1616 trace_ocfs2_replay_journal_skip(node_num);
1617
1618 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1619 goto done;
1620 }
1621
1622
1623 ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
1624
1625 printk(KERN_NOTICE "ocfs2: Begin replay journal (node %d, slot %d) on "\
1626 "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev),
1627 MINOR(osb->sb->s_dev));
1628
1629 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
1630
1631 status = ocfs2_force_read_journal(inode);
1632 if (status < 0) {
1633 mlog_errno(status);
1634 goto done;
1635 }
1636
1637 journal = jbd2_journal_init_inode(inode);
1638 if (journal == NULL) {
1639 mlog(ML_ERROR, "Linux journal layer error\n");
1640 status = -EIO;
1641 goto done;
1642 }
1643
1644 status = jbd2_journal_load(journal);
1645 if (status < 0) {
1646 mlog_errno(status);
1647 if (!igrab(inode))
1648 BUG();
1649 jbd2_journal_destroy(journal);
1650 goto done;
1651 }
1652
1653 ocfs2_clear_journal_error(osb->sb, journal, slot_num);
1654
1655
1656 jbd2_journal_lock_updates(journal);
1657 status = jbd2_journal_flush(journal);
1658 jbd2_journal_unlock_updates(journal);
1659 if (status < 0)
1660 mlog_errno(status);
1661
1662
1663 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1664 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
1665 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
1666
1667
1668 ocfs2_bump_recovery_generation(fe);
1669 osb->slot_recovery_generations[slot_num] =
1670 ocfs2_get_recovery_generation(fe);
1671
1672 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
1673 status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
1674 if (status < 0)
1675 mlog_errno(status);
1676
1677 if (!igrab(inode))
1678 BUG();
1679
1680 jbd2_journal_destroy(journal);
1681
1682 printk(KERN_NOTICE "ocfs2: End replay journal (node %d, slot %d) on "\
1683 "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev),
1684 MINOR(osb->sb->s_dev));
1685done:
1686
1687 if (got_lock)
1688 ocfs2_inode_unlock(inode, 1);
1689
1690 iput(inode);
1691 brelse(bh);
1692
1693 return status;
1694}
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708static int ocfs2_recover_node(struct ocfs2_super *osb,
1709 int node_num, int slot_num)
1710{
1711 int status = 0;
1712 struct ocfs2_dinode *la_copy = NULL;
1713 struct ocfs2_dinode *tl_copy = NULL;
1714
1715 trace_ocfs2_recover_node(node_num, slot_num, osb->node_num);
1716
1717
1718
1719 BUG_ON(osb->node_num == node_num);
1720
1721 status = ocfs2_replay_journal(osb, node_num, slot_num);
1722 if (status < 0) {
1723 if (status == -EBUSY) {
1724 trace_ocfs2_recover_node_skip(slot_num, node_num);
1725 status = 0;
1726 goto done;
1727 }
1728 mlog_errno(status);
1729 goto done;
1730 }
1731
1732
1733 status = ocfs2_begin_local_alloc_recovery(osb, slot_num, &la_copy);
1734 if (status < 0) {
1735 mlog_errno(status);
1736 goto done;
1737 }
1738
1739
1740
1741
1742 status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy);
1743 if (status < 0)
1744 mlog_errno(status);
1745
1746
1747
1748 status = ocfs2_clear_slot(osb, slot_num);
1749 if (status < 0)
1750 mlog_errno(status);
1751
1752
1753 ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
1754 tl_copy, NULL, ORPHAN_NEED_TRUNCATE);
1755
1756 status = 0;
1757done:
1758
1759 return status;
1760}
1761
1762
1763
1764
1765static int ocfs2_trylock_journal(struct ocfs2_super *osb,
1766 int slot_num)
1767{
1768 int status, flags;
1769 struct inode *inode = NULL;
1770
1771 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1772 slot_num);
1773 if (inode == NULL) {
1774 mlog(ML_ERROR, "access error\n");
1775 status = -EACCES;
1776 goto bail;
1777 }
1778 if (is_bad_inode(inode)) {
1779 mlog(ML_ERROR, "access error (bad inode)\n");
1780 iput(inode);
1781 inode = NULL;
1782 status = -EACCES;
1783 goto bail;
1784 }
1785 SET_INODE_JOURNAL(inode);
1786
1787 flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE;
1788 status = ocfs2_inode_lock_full(inode, NULL, 1, flags);
1789 if (status < 0) {
1790 if (status != -EAGAIN)
1791 mlog_errno(status);
1792 goto bail;
1793 }
1794
1795 ocfs2_inode_unlock(inode, 1);
1796bail:
1797 iput(inode);
1798
1799 return status;
1800}
1801
1802
1803
1804int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1805{
1806 unsigned int node_num;
1807 int status, i;
1808 u32 gen;
1809 struct buffer_head *bh = NULL;
1810 struct ocfs2_dinode *di;
1811
1812
1813
1814
1815 for (i = 0; i < osb->max_slots; i++) {
1816
1817 status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
1818 if (status) {
1819 mlog_errno(status);
1820 goto bail;
1821 }
1822 di = (struct ocfs2_dinode *)bh->b_data;
1823 gen = ocfs2_get_recovery_generation(di);
1824 brelse(bh);
1825 bh = NULL;
1826
1827 spin_lock(&osb->osb_lock);
1828 osb->slot_recovery_generations[i] = gen;
1829
1830 trace_ocfs2_mark_dead_nodes(i,
1831 osb->slot_recovery_generations[i]);
1832
1833 if (i == osb->slot_num) {
1834 spin_unlock(&osb->osb_lock);
1835 continue;
1836 }
1837
1838 status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
1839 if (status == -ENOENT) {
1840 spin_unlock(&osb->osb_lock);
1841 continue;
1842 }
1843
1844 if (__ocfs2_recovery_map_test(osb, node_num)) {
1845 spin_unlock(&osb->osb_lock);
1846 continue;
1847 }
1848 spin_unlock(&osb->osb_lock);
1849
1850
1851
1852
1853 status = ocfs2_trylock_journal(osb, i);
1854 if (!status) {
1855
1856
1857
1858 ocfs2_recovery_thread(osb, node_num);
1859 } else if ((status < 0) && (status != -EAGAIN)) {
1860 mlog_errno(status);
1861 goto bail;
1862 }
1863 }
1864
1865 status = 0;
1866bail:
1867 return status;
1868}
1869
1870
1871
1872
1873
1874
1875static inline unsigned long ocfs2_orphan_scan_timeout(void)
1876{
1877 unsigned long time;
1878
1879 get_random_bytes(&time, sizeof(time));
1880 time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000);
1881 return msecs_to_jiffies(time);
1882}
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911static void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1912{
1913 struct ocfs2_orphan_scan *os;
1914 int status, i;
1915 u32 seqno = 0;
1916
1917 os = &osb->osb_orphan_scan;
1918
1919 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1920 goto out;
1921
1922 trace_ocfs2_queue_orphan_scan_begin(os->os_count, os->os_seqno,
1923 atomic_read(&os->os_state));
1924
1925 status = ocfs2_orphan_scan_lock(osb, &seqno);
1926 if (status < 0) {
1927 if (status != -EAGAIN)
1928 mlog_errno(status);
1929 goto out;
1930 }
1931
1932
1933 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1934 goto unlock;
1935
1936 if (os->os_seqno != seqno) {
1937 os->os_seqno = seqno;
1938 goto unlock;
1939 }
1940
1941 for (i = 0; i < osb->max_slots; i++)
1942 ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
1943 NULL, ORPHAN_NO_NEED_TRUNCATE);
1944
1945
1946
1947
1948 seqno++;
1949 os->os_count++;
1950 os->os_scantime = ktime_get_seconds();
1951unlock:
1952 ocfs2_orphan_scan_unlock(osb, seqno);
1953out:
1954 trace_ocfs2_queue_orphan_scan_end(os->os_count, os->os_seqno,
1955 atomic_read(&os->os_state));
1956 return;
1957}
1958
1959
1960static void ocfs2_orphan_scan_work(struct work_struct *work)
1961{
1962 struct ocfs2_orphan_scan *os;
1963 struct ocfs2_super *osb;
1964
1965 os = container_of(work, struct ocfs2_orphan_scan,
1966 os_orphan_scan_work.work);
1967 osb = os->os_osb;
1968
1969 mutex_lock(&os->os_lock);
1970 ocfs2_queue_orphan_scan(osb);
1971 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
1972 queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
1973 ocfs2_orphan_scan_timeout());
1974 mutex_unlock(&os->os_lock);
1975}
1976
1977void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
1978{
1979 struct ocfs2_orphan_scan *os;
1980
1981 os = &osb->osb_orphan_scan;
1982 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) {
1983 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1984 mutex_lock(&os->os_lock);
1985 cancel_delayed_work(&os->os_orphan_scan_work);
1986 mutex_unlock(&os->os_lock);
1987 }
1988}
1989
1990void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
1991{
1992 struct ocfs2_orphan_scan *os;
1993
1994 os = &osb->osb_orphan_scan;
1995 os->os_osb = osb;
1996 os->os_count = 0;
1997 os->os_seqno = 0;
1998 mutex_init(&os->os_lock);
1999 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
2000}
2001
2002void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
2003{
2004 struct ocfs2_orphan_scan *os;
2005
2006 os = &osb->osb_orphan_scan;
2007 os->os_scantime = ktime_get_seconds();
2008 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
2009 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
2010 else {
2011 atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
2012 queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
2013 ocfs2_orphan_scan_timeout());
2014 }
2015}
2016
2017struct ocfs2_orphan_filldir_priv {
2018 struct dir_context ctx;
2019 struct inode *head;
2020 struct ocfs2_super *osb;
2021 enum ocfs2_orphan_reco_type orphan_reco_type;
2022};
2023
2024static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
2025 int name_len, loff_t pos, u64 ino,
2026 unsigned type)
2027{
2028 struct ocfs2_orphan_filldir_priv *p =
2029 container_of(ctx, struct ocfs2_orphan_filldir_priv, ctx);
2030 struct inode *iter;
2031
2032 if (name_len == 1 && !strncmp(".", name, 1))
2033 return 0;
2034 if (name_len == 2 && !strncmp("..", name, 2))
2035 return 0;
2036
2037
2038 if ((p->orphan_reco_type == ORPHAN_NO_NEED_TRUNCATE) &&
2039 (!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX,
2040 OCFS2_DIO_ORPHAN_PREFIX_LEN)))
2041 return 0;
2042
2043
2044 iter = ocfs2_iget(p->osb, ino,
2045 OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0);
2046 if (IS_ERR(iter))
2047 return 0;
2048
2049 if (!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX,
2050 OCFS2_DIO_ORPHAN_PREFIX_LEN))
2051 OCFS2_I(iter)->ip_flags |= OCFS2_INODE_DIO_ORPHAN_ENTRY;
2052
2053
2054
2055 if (OCFS2_I(iter)->ip_next_orphan) {
2056 iput(iter);
2057 return 0;
2058 }
2059
2060 trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno);
2061
2062
2063 OCFS2_I(iter)->ip_next_orphan = p->head;
2064 p->head = iter;
2065
2066 return 0;
2067}
2068
2069static int ocfs2_queue_orphans(struct ocfs2_super *osb,
2070 int slot,
2071 struct inode **head,
2072 enum ocfs2_orphan_reco_type orphan_reco_type)
2073{
2074 int status;
2075 struct inode *orphan_dir_inode = NULL;
2076 struct ocfs2_orphan_filldir_priv priv = {
2077 .ctx.actor = ocfs2_orphan_filldir,
2078 .osb = osb,
2079 .head = *head,
2080 .orphan_reco_type = orphan_reco_type
2081 };
2082
2083 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2084 ORPHAN_DIR_SYSTEM_INODE,
2085 slot);
2086 if (!orphan_dir_inode) {
2087 status = -ENOENT;
2088 mlog_errno(status);
2089 return status;
2090 }
2091
2092 inode_lock(orphan_dir_inode);
2093 status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
2094 if (status < 0) {
2095 mlog_errno(status);
2096 goto out;
2097 }
2098
2099 status = ocfs2_dir_foreach(orphan_dir_inode, &priv.ctx);
2100 if (status) {
2101 mlog_errno(status);
2102 goto out_cluster;
2103 }
2104
2105 *head = priv.head;
2106
2107out_cluster:
2108 ocfs2_inode_unlock(orphan_dir_inode, 0);
2109out:
2110 inode_unlock(orphan_dir_inode);
2111 iput(orphan_dir_inode);
2112 return status;
2113}
2114
2115static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb,
2116 int slot)
2117{
2118 int ret;
2119
2120 spin_lock(&osb->osb_lock);
2121 ret = !osb->osb_orphan_wipes[slot];
2122 spin_unlock(&osb->osb_lock);
2123 return ret;
2124}
2125
2126static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb,
2127 int slot)
2128{
2129 spin_lock(&osb->osb_lock);
2130
2131
2132 ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
2133 while (osb->osb_orphan_wipes[slot]) {
2134
2135
2136
2137 spin_unlock(&osb->osb_lock);
2138 wait_event_interruptible(osb->osb_wipe_event,
2139 ocfs2_orphan_recovery_can_continue(osb, slot));
2140 spin_lock(&osb->osb_lock);
2141 }
2142 spin_unlock(&osb->osb_lock);
2143}
2144
2145static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,
2146 int slot)
2147{
2148 ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot);
2149}
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169static int ocfs2_recover_orphans(struct ocfs2_super *osb,
2170 int slot,
2171 enum ocfs2_orphan_reco_type orphan_reco_type)
2172{
2173 int ret = 0;
2174 struct inode *inode = NULL;
2175 struct inode *iter;
2176 struct ocfs2_inode_info *oi;
2177 struct buffer_head *di_bh = NULL;
2178 struct ocfs2_dinode *di = NULL;
2179
2180 trace_ocfs2_recover_orphans(slot);
2181
2182 ocfs2_mark_recovering_orphan_dir(osb, slot);
2183 ret = ocfs2_queue_orphans(osb, slot, &inode, orphan_reco_type);
2184 ocfs2_clear_recovering_orphan_dir(osb, slot);
2185
2186
2187
2188 if (ret)
2189 mlog_errno(ret);
2190
2191 while (inode) {
2192 oi = OCFS2_I(inode);
2193 trace_ocfs2_recover_orphans_iput(
2194 (unsigned long long)oi->ip_blkno);
2195
2196 iter = oi->ip_next_orphan;
2197 oi->ip_next_orphan = NULL;
2198
2199 if (oi->ip_flags & OCFS2_INODE_DIO_ORPHAN_ENTRY) {
2200 inode_lock(inode);
2201 ret = ocfs2_rw_lock(inode, 1);
2202 if (ret < 0) {
2203 mlog_errno(ret);
2204 goto unlock_mutex;
2205 }
2206
2207
2208
2209
2210 ret = ocfs2_inode_lock(inode, &di_bh, 1);
2211 if (ret) {
2212 mlog_errno(ret);
2213 goto unlock_rw;
2214 }
2215
2216 di = (struct ocfs2_dinode *)di_bh->b_data;
2217
2218 if (di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL)) {
2219 ret = ocfs2_truncate_file(inode, di_bh,
2220 i_size_read(inode));
2221 if (ret < 0) {
2222 if (ret != -ENOSPC)
2223 mlog_errno(ret);
2224 goto unlock_inode;
2225 }
2226
2227 ret = ocfs2_del_inode_from_orphan(osb, inode,
2228 di_bh, 0, 0);
2229 if (ret)
2230 mlog_errno(ret);
2231 }
2232unlock_inode:
2233 ocfs2_inode_unlock(inode, 1);
2234 brelse(di_bh);
2235 di_bh = NULL;
2236unlock_rw:
2237 ocfs2_rw_unlock(inode, 1);
2238unlock_mutex:
2239 inode_unlock(inode);
2240
2241
2242 oi->ip_flags &= ~OCFS2_INODE_DIO_ORPHAN_ENTRY;
2243 } else {
2244 spin_lock(&oi->ip_lock);
2245
2246
2247 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
2248 spin_unlock(&oi->ip_lock);
2249 }
2250
2251 iput(inode);
2252 inode = iter;
2253 }
2254
2255 return ret;
2256}
2257
2258static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
2259{
2260
2261
2262
2263 wait_event(osb->osb_mount_event,
2264 (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) ||
2265 atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS ||
2266 atomic_read(&osb->vol_state) == VOLUME_DISABLED);
2267
2268
2269
2270
2271 if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) {
2272 trace_ocfs2_wait_on_mount(VOLUME_DISABLED);
2273 mlog(0, "mount error, exiting!\n");
2274 return -EBUSY;
2275 }
2276
2277 return 0;
2278}
2279
2280static int ocfs2_commit_thread(void *arg)
2281{
2282 int status;
2283 struct ocfs2_super *osb = arg;
2284 struct ocfs2_journal *journal = osb->journal;
2285
2286
2287
2288
2289
2290 while (!(kthread_should_stop() &&
2291 atomic_read(&journal->j_num_trans) == 0)) {
2292
2293 wait_event_interruptible(osb->checkpoint_event,
2294 atomic_read(&journal->j_num_trans)
2295 || kthread_should_stop());
2296
2297 status = ocfs2_commit_cache(osb);
2298 if (status < 0) {
2299 static unsigned long abort_warn_time;
2300
2301
2302 if (printk_timed_ratelimit(&abort_warn_time, 60*HZ))
2303 mlog(ML_ERROR, "status = %d, journal is "
2304 "already aborted.\n", status);
2305
2306
2307
2308
2309
2310 msleep_interruptible(1000);
2311 }
2312
2313 if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){
2314 mlog(ML_KTHREAD,
2315 "commit_thread: %u transactions pending on "
2316 "shutdown\n",
2317 atomic_read(&journal->j_num_trans));
2318 }
2319 }
2320
2321 return 0;
2322}
2323
2324
2325
2326
2327
2328
2329int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
2330{
2331 int ret = 0;
2332 unsigned int slot;
2333 struct buffer_head *di_bh = NULL;
2334 struct ocfs2_dinode *di;
2335 int journal_dirty = 0;
2336
2337 for(slot = 0; slot < osb->max_slots; slot++) {
2338 ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
2339 if (ret) {
2340 mlog_errno(ret);
2341 goto out;
2342 }
2343
2344 di = (struct ocfs2_dinode *) di_bh->b_data;
2345
2346 osb->slot_recovery_generations[slot] =
2347 ocfs2_get_recovery_generation(di);
2348
2349 if (le32_to_cpu(di->id1.journal1.ij_flags) &
2350 OCFS2_JOURNAL_DIRTY_FL)
2351 journal_dirty = 1;
2352
2353 brelse(di_bh);
2354 di_bh = NULL;
2355 }
2356
2357out:
2358 if (journal_dirty)
2359 ret = -EROFS;
2360 return ret;
2361}
2362