linux/fs/gfs2/recovery.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   4 *
   5 * This copyrighted material is made available to anyone wishing to use,
   6 * modify, copy, or redistribute it subject to the terms and conditions
   7 * of the GNU General Public License version 2.
   8 */
   9
  10#include <linux/module.h>
  11#include <linux/slab.h>
  12#include <linux/spinlock.h>
  13#include <linux/completion.h>
  14#include <linux/buffer_head.h>
  15#include <linux/gfs2_ondisk.h>
  16#include <linux/crc32.h>
  17
  18#include "gfs2.h"
  19#include "incore.h"
  20#include "bmap.h"
  21#include "glock.h"
  22#include "glops.h"
  23#include "lops.h"
  24#include "meta_io.h"
  25#include "recovery.h"
  26#include "super.h"
  27#include "util.h"
  28#include "dir.h"
  29
  30struct workqueue_struct *gfs_recovery_wq;
  31
  32int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
  33                           struct buffer_head **bh)
  34{
  35        struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
  36        struct gfs2_glock *gl = ip->i_gl;
  37        int new = 0;
  38        u64 dblock;
  39        u32 extlen;
  40        int error;
  41
  42        error = gfs2_extent_map(&ip->i_inode, blk, &new, &dblock, &extlen);
  43        if (error)
  44                return error;
  45        if (!dblock) {
  46                gfs2_consist_inode(ip);
  47                return -EIO;
  48        }
  49
  50        *bh = gfs2_meta_ra(gl, dblock, extlen);
  51
  52        return error;
  53}
  54
  55int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
  56{
  57        struct list_head *head = &jd->jd_revoke_list;
  58        struct gfs2_revoke_replay *rr;
  59        int found = 0;
  60
  61        list_for_each_entry(rr, head, rr_list) {
  62                if (rr->rr_blkno == blkno) {
  63                        found = 1;
  64                        break;
  65                }
  66        }
  67
  68        if (found) {
  69                rr->rr_where = where;
  70                return 0;
  71        }
  72
  73        rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_NOFS);
  74        if (!rr)
  75                return -ENOMEM;
  76
  77        rr->rr_blkno = blkno;
  78        rr->rr_where = where;
  79        list_add(&rr->rr_list, head);
  80
  81        return 1;
  82}
  83
  84int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
  85{
  86        struct gfs2_revoke_replay *rr;
  87        int wrap, a, b, revoke;
  88        int found = 0;
  89
  90        list_for_each_entry(rr, &jd->jd_revoke_list, rr_list) {
  91                if (rr->rr_blkno == blkno) {
  92                        found = 1;
  93                        break;
  94                }
  95        }
  96
  97        if (!found)
  98                return 0;
  99
 100        wrap = (rr->rr_where < jd->jd_replay_tail);
 101        a = (jd->jd_replay_tail < where);
 102        b = (where < rr->rr_where);
 103        revoke = (wrap) ? (a || b) : (a && b);
 104
 105        return revoke;
 106}
 107
 108void gfs2_revoke_clean(struct gfs2_jdesc *jd)
 109{
 110        struct list_head *head = &jd->jd_revoke_list;
 111        struct gfs2_revoke_replay *rr;
 112
 113        while (!list_empty(head)) {
 114                rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
 115                list_del(&rr->rr_list);
 116                kfree(rr);
 117        }
 118}
 119
 120static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
 121{
 122        const struct gfs2_log_header *str = buf;
 123
 124        if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
 125            str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH))
 126                return 1;
 127
 128        lh->lh_sequence = be64_to_cpu(str->lh_sequence);
 129        lh->lh_flags = be32_to_cpu(str->lh_flags);
 130        lh->lh_tail = be32_to_cpu(str->lh_tail);
 131        lh->lh_blkno = be32_to_cpu(str->lh_blkno);
 132        lh->lh_hash = be32_to_cpu(str->lh_hash);
 133        return 0;
 134}
 135
 136/**
 137 * get_log_header - read the log header for a given segment
 138 * @jd: the journal
 139 * @blk: the block to look at
 140 * @lh: the log header to return
 141 *
 142 * Read the log header for a given segement in a given journal.  Do a few
 143 * sanity checks on it.
 144 *
 145 * Returns: 0 on success,
 146 *          1 if the header was invalid or incomplete,
 147 *          errno on error
 148 */
 149
 150static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 151                          struct gfs2_log_header_host *head)
 152{
 153        struct buffer_head *bh;
 154        struct gfs2_log_header_host uninitialized_var(lh);
 155        const u32 nothing = 0;
 156        u32 hash;
 157        int error;
 158
 159        error = gfs2_replay_read_block(jd, blk, &bh);
 160        if (error)
 161                return error;
 162
 163        hash = crc32_le((u32)~0, bh->b_data, sizeof(struct gfs2_log_header) -
 164                                             sizeof(u32));
 165        hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
 166        hash ^= (u32)~0;
 167        error = gfs2_log_header_in(&lh, bh->b_data);
 168        brelse(bh);
 169
 170        if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
 171                return 1;
 172
 173        *head = lh;
 174
 175        return 0;
 176}
 177
 178/**
 179 * find_good_lh - find a good log header
 180 * @jd: the journal
 181 * @blk: the segment to start searching from
 182 * @lh: the log header to fill in
 183 * @forward: if true search forward in the log, else search backward
 184 *
 185 * Call get_log_header() to get a log header for a segment, but if the
 186 * segment is bad, either scan forward or backward until we find a good one.
 187 *
 188 * Returns: errno
 189 */
 190
 191static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
 192                        struct gfs2_log_header_host *head)
 193{
 194        unsigned int orig_blk = *blk;
 195        int error;
 196
 197        for (;;) {
 198                error = get_log_header(jd, *blk, head);
 199                if (error <= 0)
 200                        return error;
 201
 202                if (++*blk == jd->jd_blocks)
 203                        *blk = 0;
 204
 205                if (*blk == orig_blk) {
 206                        gfs2_consist_inode(GFS2_I(jd->jd_inode));
 207                        return -EIO;
 208                }
 209        }
 210}
 211
 212/**
 213 * jhead_scan - make sure we've found the head of the log
 214 * @jd: the journal
 215 * @head: this is filled in with the log descriptor of the head
 216 *
 217 * At this point, seg and lh should be either the head of the log or just
 218 * before.  Scan forward until we find the head.
 219 *
 220 * Returns: errno
 221 */
 222
 223static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 224{
 225        unsigned int blk = head->lh_blkno;
 226        struct gfs2_log_header_host lh;
 227        int error;
 228
 229        for (;;) {
 230                if (++blk == jd->jd_blocks)
 231                        blk = 0;
 232
 233                error = get_log_header(jd, blk, &lh);
 234                if (error < 0)
 235                        return error;
 236                if (error == 1)
 237                        continue;
 238
 239                if (lh.lh_sequence == head->lh_sequence) {
 240                        gfs2_consist_inode(GFS2_I(jd->jd_inode));
 241                        return -EIO;
 242                }
 243                if (lh.lh_sequence < head->lh_sequence)
 244                        break;
 245
 246                *head = lh;
 247        }
 248
 249        return 0;
 250}
 251
 252/**
 253 * gfs2_find_jhead - find the head of a log
 254 * @jd: the journal
 255 * @head: the log descriptor for the head of the log is returned here
 256 *
 257 * Do a binary search of a journal and find the valid log entry with the
 258 * highest sequence number.  (i.e. the log head)
 259 *
 260 * Returns: errno
 261 */
 262
 263int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 264{
 265        struct gfs2_log_header_host lh_1, lh_m;
 266        u32 blk_1, blk_2, blk_m;
 267        int error;
 268
 269        blk_1 = 0;
 270        blk_2 = jd->jd_blocks - 1;
 271
 272        for (;;) {
 273                blk_m = (blk_1 + blk_2) / 2;
 274
 275                error = find_good_lh(jd, &blk_1, &lh_1);
 276                if (error)
 277                        return error;
 278
 279                error = find_good_lh(jd, &blk_m, &lh_m);
 280                if (error)
 281                        return error;
 282
 283                if (blk_1 == blk_m || blk_m == blk_2)
 284                        break;
 285
 286                if (lh_1.lh_sequence <= lh_m.lh_sequence)
 287                        blk_1 = blk_m;
 288                else
 289                        blk_2 = blk_m;
 290        }
 291
 292        error = jhead_scan(jd, &lh_1);
 293        if (error)
 294                return error;
 295
 296        *head = lh_1;
 297
 298        return error;
 299}
 300
 301/**
 302 * foreach_descriptor - go through the active part of the log
 303 * @jd: the journal
 304 * @start: the first log header in the active region
 305 * @end: the last log header (don't process the contents of this entry))
 306 *
 307 * Call a given function once for every log descriptor in the active
 308 * portion of the log.
 309 *
 310 * Returns: errno
 311 */
 312
 313static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
 314                              unsigned int end, int pass)
 315{
 316        struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 317        struct buffer_head *bh;
 318        struct gfs2_log_descriptor *ld;
 319        int error = 0;
 320        u32 length;
 321        __be64 *ptr;
 322        unsigned int offset = sizeof(struct gfs2_log_descriptor);
 323        offset += sizeof(__be64) - 1;
 324        offset &= ~(sizeof(__be64) - 1);
 325
 326        while (start != end) {
 327                error = gfs2_replay_read_block(jd, start, &bh);
 328                if (error)
 329                        return error;
 330                if (gfs2_meta_check(sdp, bh)) {
 331                        brelse(bh);
 332                        return -EIO;
 333                }
 334                ld = (struct gfs2_log_descriptor *)bh->b_data;
 335                length = be32_to_cpu(ld->ld_length);
 336
 337                if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
 338                        struct gfs2_log_header_host lh;
 339                        error = get_log_header(jd, start, &lh);
 340                        if (!error) {
 341                                gfs2_replay_incr_blk(jd, &start);
 342                                brelse(bh);
 343                                continue;
 344                        }
 345                        if (error == 1) {
 346                                gfs2_consist_inode(GFS2_I(jd->jd_inode));
 347                                error = -EIO;
 348                        }
 349                        brelse(bh);
 350                        return error;
 351                } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
 352                        brelse(bh);
 353                        return -EIO;
 354                }
 355                ptr = (__be64 *)(bh->b_data + offset);
 356                error = lops_scan_elements(jd, start, ld, ptr, pass);
 357                if (error) {
 358                        brelse(bh);
 359                        return error;
 360                }
 361
 362                while (length--)
 363                        gfs2_replay_incr_blk(jd, &start);
 364
 365                brelse(bh);
 366        }
 367
 368        return 0;
 369}
 370
 371/**
 372 * clean_journal - mark a dirty journal as being clean
 373 * @sdp: the filesystem
 374 * @jd: the journal
 375 * @gl: the journal's glock
 376 * @head: the head journal to start from
 377 *
 378 * Returns: errno
 379 */
 380
 381static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 382{
 383        struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 384        struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 385        unsigned int lblock;
 386        struct gfs2_log_header *lh;
 387        u32 hash;
 388        struct buffer_head *bh;
 389        int error;
 390        struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
 391
 392        lblock = head->lh_blkno;
 393        gfs2_replay_incr_blk(jd, &lblock);
 394        bh_map.b_size = 1 << ip->i_inode.i_blkbits;
 395        error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0);
 396        if (error)
 397                return error;
 398        if (!bh_map.b_blocknr) {
 399                gfs2_consist_inode(ip);
 400                return -EIO;
 401        }
 402
 403        bh = sb_getblk(sdp->sd_vfs, bh_map.b_blocknr);
 404        lock_buffer(bh);
 405        memset(bh->b_data, 0, bh->b_size);
 406        set_buffer_uptodate(bh);
 407        clear_buffer_dirty(bh);
 408        unlock_buffer(bh);
 409
 410        lh = (struct gfs2_log_header *)bh->b_data;
 411        memset(lh, 0, sizeof(struct gfs2_log_header));
 412        lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
 413        lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
 414        lh->lh_header.__pad0 = cpu_to_be64(0);
 415        lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
 416        lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
 417        lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
 418        lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
 419        lh->lh_blkno = cpu_to_be32(lblock);
 420        hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
 421        lh->lh_hash = cpu_to_be32(hash);
 422
 423        set_buffer_dirty(bh);
 424        if (sync_dirty_buffer(bh))
 425                gfs2_io_error_bh(sdp, bh);
 426        brelse(bh);
 427
 428        return error;
 429}
 430
 431
 432static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
 433                               unsigned int message)
 434{
 435        char env_jid[20];
 436        char env_status[20];
 437        char *envp[] = { env_jid, env_status, NULL };
 438        struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 439
 440        ls->ls_recover_jid_done = jid;
 441        ls->ls_recover_jid_status = message;
 442        sprintf(env_jid, "JID=%u", jid);
 443        sprintf(env_status, "RECOVERY=%s",
 444                message == LM_RD_SUCCESS ? "Done" : "Failed");
 445        kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
 446
 447        if (sdp->sd_lockstruct.ls_ops->lm_recovery_result)
 448                sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
 449}
 450
 451void gfs2_recover_func(struct work_struct *work)
 452{
 453        struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
 454        struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 455        struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 456        struct gfs2_log_header_host head;
 457        struct gfs2_holder j_gh, ji_gh, thaw_gh;
 458        unsigned long t;
 459        int ro = 0;
 460        unsigned int pass;
 461        int error;
 462        int jlocked = 0;
 463
 464        if (sdp->sd_args.ar_spectator ||
 465            (jd->jd_jid != sdp->sd_lockstruct.ls_jid)) {
 466                fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
 467                        jd->jd_jid);
 468                jlocked = 1;
 469                /* Acquire the journal lock so we can do recovery */
 470
 471                error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
 472                                          LM_ST_EXCLUSIVE,
 473                                          LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
 474                                          &j_gh);
 475                switch (error) {
 476                case 0:
 477                        break;
 478
 479                case GLR_TRYFAILED:
 480                        fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
 481                        error = 0;
 482
 483                default:
 484                        goto fail;
 485                };
 486
 487                error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
 488                                           LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh);
 489                if (error)
 490                        goto fail_gunlock_j;
 491        } else {
 492                fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
 493        }
 494
 495        fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
 496
 497        error = gfs2_jdesc_check(jd);
 498        if (error)
 499                goto fail_gunlock_ji;
 500
 501        error = gfs2_find_jhead(jd, &head);
 502        if (error)
 503                goto fail_gunlock_ji;
 504
 505        if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
 506                fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
 507                        jd->jd_jid);
 508
 509                t = jiffies;
 510
 511                /* Acquire a shared hold on the freeze lock */
 512
 513                error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
 514                                           LM_FLAG_NOEXP | LM_FLAG_PRIORITY,
 515                                           &thaw_gh);
 516                if (error)
 517                        goto fail_gunlock_ji;
 518
 519                if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) {
 520                        ro = 1;
 521                } else if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
 522                        if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
 523                                ro = 1;
 524                } else {
 525                        if (sb_rdonly(sdp->sd_vfs)) {
 526                                /* check if device itself is read-only */
 527                                ro = bdev_read_only(sdp->sd_vfs->s_bdev);
 528                                if (!ro) {
 529                                        fs_info(sdp, "recovery required on "
 530                                                "read-only filesystem.\n");
 531                                        fs_info(sdp, "write access will be "
 532                                                "enabled during recovery.\n");
 533                                }
 534                        }
 535                }
 536
 537                if (ro) {
 538                        fs_warn(sdp, "jid=%u: Can't replay: read-only block "
 539                                "device\n", jd->jd_jid);
 540                        error = -EROFS;
 541                        goto fail_gunlock_thaw;
 542                }
 543
 544                fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
 545
 546                for (pass = 0; pass < 2; pass++) {
 547                        lops_before_scan(jd, &head, pass);
 548                        error = foreach_descriptor(jd, head.lh_tail,
 549                                                   head.lh_blkno, pass);
 550                        lops_after_scan(jd, error, pass);
 551                        if (error)
 552                                goto fail_gunlock_thaw;
 553                }
 554
 555                error = clean_journal(jd, &head);
 556                if (error)
 557                        goto fail_gunlock_thaw;
 558
 559                gfs2_glock_dq_uninit(&thaw_gh);
 560                t = DIV_ROUND_UP(jiffies - t, HZ);
 561                fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
 562                        jd->jd_jid, t);
 563        }
 564
 565        gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
 566
 567        if (jlocked) {
 568                gfs2_glock_dq_uninit(&ji_gh);
 569                gfs2_glock_dq_uninit(&j_gh);
 570        }
 571
 572        fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
 573        goto done;
 574
 575fail_gunlock_thaw:
 576        gfs2_glock_dq_uninit(&thaw_gh);
 577fail_gunlock_ji:
 578        if (jlocked) {
 579                gfs2_glock_dq_uninit(&ji_gh);
 580fail_gunlock_j:
 581                gfs2_glock_dq_uninit(&j_gh);
 582        }
 583
 584        fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
 585fail:
 586        jd->jd_recover_error = error;
 587        gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
 588done:
 589        clear_bit(JDF_RECOVERY, &jd->jd_flags);
 590        smp_mb__after_atomic();
 591        wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
 592}
 593
 594int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
 595{
 596        int rv;
 597
 598        if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
 599                return -EBUSY;
 600
 601        /* we have JDF_RECOVERY, queue should always succeed */
 602        rv = queue_work(gfs_recovery_wq, &jd->jd_work);
 603        BUG_ON(!rv);
 604
 605        if (wait)
 606                wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
 607                            TASK_UNINTERRUPTIBLE);
 608
 609        return wait ? jd->jd_recover_error : 0;
 610}
 611
 612