linux/fs/gfs2/recovery.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   4 *
   5 * This copyrighted material is made available to anyone wishing to use,
   6 * modify, copy, or redistribute it subject to the terms and conditions
   7 * of the GNU General Public License version 2.
   8 */
   9
  10#include <linux/module.h>
  11#include <linux/slab.h>
  12#include <linux/spinlock.h>
  13#include <linux/completion.h>
  14#include <linux/buffer_head.h>
  15#include <linux/gfs2_ondisk.h>
  16#include <linux/crc32.h>
  17#include <linux/crc32c.h>
  18#include <linux/ktime.h>
  19
  20#include "gfs2.h"
  21#include "incore.h"
  22#include "bmap.h"
  23#include "glock.h"
  24#include "glops.h"
  25#include "log.h"
  26#include "lops.h"
  27#include "meta_io.h"
  28#include "recovery.h"
  29#include "super.h"
  30#include "util.h"
  31#include "dir.h"
  32
  33struct workqueue_struct *gfs_recovery_wq;
  34
  35int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
  36                           struct buffer_head **bh)
  37{
  38        struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
  39        struct gfs2_glock *gl = ip->i_gl;
  40        int new = 0;
  41        u64 dblock;
  42        u32 extlen;
  43        int error;
  44
  45        error = gfs2_extent_map(&ip->i_inode, blk, &new, &dblock, &extlen);
  46        if (error)
  47                return error;
  48        if (!dblock) {
  49                gfs2_consist_inode(ip);
  50                return -EIO;
  51        }
  52
  53        *bh = gfs2_meta_ra(gl, dblock, extlen);
  54
  55        return error;
  56}
  57
  58int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
  59{
  60        struct list_head *head = &jd->jd_revoke_list;
  61        struct gfs2_revoke_replay *rr;
  62        int found = 0;
  63
  64        list_for_each_entry(rr, head, rr_list) {
  65                if (rr->rr_blkno == blkno) {
  66                        found = 1;
  67                        break;
  68                }
  69        }
  70
  71        if (found) {
  72                rr->rr_where = where;
  73                return 0;
  74        }
  75
  76        rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_NOFS);
  77        if (!rr)
  78                return -ENOMEM;
  79
  80        rr->rr_blkno = blkno;
  81        rr->rr_where = where;
  82        list_add(&rr->rr_list, head);
  83
  84        return 1;
  85}
  86
  87int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
  88{
  89        struct gfs2_revoke_replay *rr;
  90        int wrap, a, b, revoke;
  91        int found = 0;
  92
  93        list_for_each_entry(rr, &jd->jd_revoke_list, rr_list) {
  94                if (rr->rr_blkno == blkno) {
  95                        found = 1;
  96                        break;
  97                }
  98        }
  99
 100        if (!found)
 101                return 0;
 102
 103        wrap = (rr->rr_where < jd->jd_replay_tail);
 104        a = (jd->jd_replay_tail < where);
 105        b = (where < rr->rr_where);
 106        revoke = (wrap) ? (a || b) : (a && b);
 107
 108        return revoke;
 109}
 110
 111void gfs2_revoke_clean(struct gfs2_jdesc *jd)
 112{
 113        struct list_head *head = &jd->jd_revoke_list;
 114        struct gfs2_revoke_replay *rr;
 115
 116        while (!list_empty(head)) {
 117                rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
 118                list_del(&rr->rr_list);
 119                kfree(rr);
 120        }
 121}
 122
 123/**
 124 * get_log_header - read the log header for a given segment
 125 * @jd: the journal
 126 * @blk: the block to look at
 127 * @lh: the log header to return
 128 *
 129 * Read the log header for a given segement in a given journal.  Do a few
 130 * sanity checks on it.
 131 *
 132 * Returns: 0 on success,
 133 *          1 if the header was invalid or incomplete,
 134 *          errno on error
 135 */
 136
 137static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 138                          struct gfs2_log_header_host *head)
 139{
 140        struct gfs2_log_header *lh;
 141        struct buffer_head *bh;
 142        u32 hash, crc;
 143        int error;
 144
 145        error = gfs2_replay_read_block(jd, blk, &bh);
 146        if (error)
 147                return error;
 148        lh = (void *)bh->b_data;
 149
 150        hash = crc32(~0, lh, LH_V1_SIZE - 4);
 151        hash = ~crc32_le_shift(hash, 4);  /* assume lh_hash is zero */
 152
 153        crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
 154                     bh->b_size - LH_V1_SIZE - 4);
 155
 156        error = lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
 157                lh->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH) ||
 158                be32_to_cpu(lh->lh_blkno) != blk ||
 159                be32_to_cpu(lh->lh_hash) != hash ||
 160                (lh->lh_crc != 0 && be32_to_cpu(lh->lh_crc) != crc);
 161
 162        brelse(bh);
 163
 164        if (!error) {
 165                head->lh_sequence = be64_to_cpu(lh->lh_sequence);
 166                head->lh_flags = be32_to_cpu(lh->lh_flags);
 167                head->lh_tail = be32_to_cpu(lh->lh_tail);
 168                head->lh_blkno = be32_to_cpu(lh->lh_blkno);
 169        }
 170        return error;
 171}
 172
 173/**
 174 * find_good_lh - find a good log header
 175 * @jd: the journal
 176 * @blk: the segment to start searching from
 177 * @lh: the log header to fill in
 178 * @forward: if true search forward in the log, else search backward
 179 *
 180 * Call get_log_header() to get a log header for a segment, but if the
 181 * segment is bad, either scan forward or backward until we find a good one.
 182 *
 183 * Returns: errno
 184 */
 185
 186static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
 187                        struct gfs2_log_header_host *head)
 188{
 189        unsigned int orig_blk = *blk;
 190        int error;
 191
 192        for (;;) {
 193                error = get_log_header(jd, *blk, head);
 194                if (error <= 0)
 195                        return error;
 196
 197                if (++*blk == jd->jd_blocks)
 198                        *blk = 0;
 199
 200                if (*blk == orig_blk) {
 201                        gfs2_consist_inode(GFS2_I(jd->jd_inode));
 202                        return -EIO;
 203                }
 204        }
 205}
 206
 207/**
 208 * jhead_scan - make sure we've found the head of the log
 209 * @jd: the journal
 210 * @head: this is filled in with the log descriptor of the head
 211 *
 212 * At this point, seg and lh should be either the head of the log or just
 213 * before.  Scan forward until we find the head.
 214 *
 215 * Returns: errno
 216 */
 217
 218static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 219{
 220        unsigned int blk = head->lh_blkno;
 221        struct gfs2_log_header_host lh;
 222        int error;
 223
 224        for (;;) {
 225                if (++blk == jd->jd_blocks)
 226                        blk = 0;
 227
 228                error = get_log_header(jd, blk, &lh);
 229                if (error < 0)
 230                        return error;
 231                if (error == 1)
 232                        continue;
 233
 234                if (lh.lh_sequence == head->lh_sequence) {
 235                        gfs2_consist_inode(GFS2_I(jd->jd_inode));
 236                        return -EIO;
 237                }
 238                if (lh.lh_sequence < head->lh_sequence)
 239                        break;
 240
 241                *head = lh;
 242        }
 243
 244        return 0;
 245}
 246
 247/**
 248 * gfs2_find_jhead - find the head of a log
 249 * @jd: the journal
 250 * @head: the log descriptor for the head of the log is returned here
 251 *
 252 * Do a binary search of a journal and find the valid log entry with the
 253 * highest sequence number.  (i.e. the log head)
 254 *
 255 * Returns: errno
 256 */
 257
 258int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 259{
 260        struct gfs2_log_header_host lh_1, lh_m;
 261        u32 blk_1, blk_2, blk_m;
 262        int error;
 263
 264        blk_1 = 0;
 265        blk_2 = jd->jd_blocks - 1;
 266
 267        for (;;) {
 268                blk_m = (blk_1 + blk_2) / 2;
 269
 270                error = find_good_lh(jd, &blk_1, &lh_1);
 271                if (error)
 272                        return error;
 273
 274                error = find_good_lh(jd, &blk_m, &lh_m);
 275                if (error)
 276                        return error;
 277
 278                if (blk_1 == blk_m || blk_m == blk_2)
 279                        break;
 280
 281                if (lh_1.lh_sequence <= lh_m.lh_sequence)
 282                        blk_1 = blk_m;
 283                else
 284                        blk_2 = blk_m;
 285        }
 286
 287        error = jhead_scan(jd, &lh_1);
 288        if (error)
 289                return error;
 290
 291        *head = lh_1;
 292
 293        return error;
 294}
 295
 296/**
 297 * foreach_descriptor - go through the active part of the log
 298 * @jd: the journal
 299 * @start: the first log header in the active region
 300 * @end: the last log header (don't process the contents of this entry))
 301 *
 302 * Call a given function once for every log descriptor in the active
 303 * portion of the log.
 304 *
 305 * Returns: errno
 306 */
 307
 308static int foreach_descriptor(struct gfs2_jdesc *jd, u32 start,
 309                              unsigned int end, int pass)
 310{
 311        struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 312        struct buffer_head *bh;
 313        struct gfs2_log_descriptor *ld;
 314        int error = 0;
 315        u32 length;
 316        __be64 *ptr;
 317        unsigned int offset = sizeof(struct gfs2_log_descriptor);
 318        offset += sizeof(__be64) - 1;
 319        offset &= ~(sizeof(__be64) - 1);
 320
 321        while (start != end) {
 322                error = gfs2_replay_read_block(jd, start, &bh);
 323                if (error)
 324                        return error;
 325                if (gfs2_meta_check(sdp, bh)) {
 326                        brelse(bh);
 327                        return -EIO;
 328                }
 329                ld = (struct gfs2_log_descriptor *)bh->b_data;
 330                length = be32_to_cpu(ld->ld_length);
 331
 332                if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
 333                        struct gfs2_log_header_host lh;
 334                        error = get_log_header(jd, start, &lh);
 335                        if (!error) {
 336                                gfs2_replay_incr_blk(jd, &start);
 337                                brelse(bh);
 338                                continue;
 339                        }
 340                        if (error == 1) {
 341                                gfs2_consist_inode(GFS2_I(jd->jd_inode));
 342                                error = -EIO;
 343                        }
 344                        brelse(bh);
 345                        return error;
 346                } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
 347                        brelse(bh);
 348                        return -EIO;
 349                }
 350                ptr = (__be64 *)(bh->b_data + offset);
 351                error = lops_scan_elements(jd, start, ld, ptr, pass);
 352                if (error) {
 353                        brelse(bh);
 354                        return error;
 355                }
 356
 357                while (length--)
 358                        gfs2_replay_incr_blk(jd, &start);
 359
 360                brelse(bh);
 361        }
 362
 363        return 0;
 364}
 365
 366/**
 367 * clean_journal - mark a dirty journal as being clean
 368 * @jd: the journal
 369 * @head: the head journal to start from
 370 *
 371 * Returns: errno
 372 */
 373
 374static void clean_journal(struct gfs2_jdesc *jd,
 375                          struct gfs2_log_header_host *head)
 376{
 377        struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 378        u32 lblock = head->lh_blkno;
 379
 380        gfs2_replay_incr_blk(jd, &lblock);
 381        if (jd->jd_jid == sdp->sd_lockstruct.ls_jid)
 382                sdp->sd_log_flush_head = lblock;
 383        gfs2_write_log_header(sdp, jd, head->lh_sequence + 1, 0, lblock,
 384                              GFS2_LOG_HEAD_UNMOUNT | GFS2_LOG_HEAD_RECOVERY,
 385                              REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC);
 386}
 387
 388
 389static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
 390                               unsigned int message)
 391{
 392        char env_jid[20];
 393        char env_status[20];
 394        char *envp[] = { env_jid, env_status, NULL };
 395        struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 396
 397        ls->ls_recover_jid_done = jid;
 398        ls->ls_recover_jid_status = message;
 399        sprintf(env_jid, "JID=%u", jid);
 400        sprintf(env_status, "RECOVERY=%s",
 401                message == LM_RD_SUCCESS ? "Done" : "Failed");
 402        kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
 403
 404        if (sdp->sd_lockstruct.ls_ops->lm_recovery_result)
 405                sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
 406}
 407
 408void gfs2_recover_func(struct work_struct *work)
 409{
 410        struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
 411        struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 412        struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 413        struct gfs2_log_header_host head;
 414        struct gfs2_holder j_gh, ji_gh, thaw_gh;
 415        ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep;
 416        int ro = 0;
 417        unsigned int pass;
 418        int error = 0;
 419        int jlocked = 0;
 420
 421        t_start = ktime_get();
 422        if (sdp->sd_args.ar_spectator)
 423                goto fail;
 424        if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
 425                fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
 426                        jd->jd_jid);
 427                jlocked = 1;
 428                /* Acquire the journal lock so we can do recovery */
 429
 430                error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
 431                                          LM_ST_EXCLUSIVE,
 432                                          LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
 433                                          &j_gh);
 434                switch (error) {
 435                case 0:
 436                        break;
 437
 438                case GLR_TRYFAILED:
 439                        fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
 440                        error = 0;
 441
 442                default:
 443                        goto fail;
 444                };
 445
 446                error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
 447                                           LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh);
 448                if (error)
 449                        goto fail_gunlock_j;
 450        } else {
 451                fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
 452        }
 453
 454        t_jlck = ktime_get();
 455        fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
 456
 457        error = gfs2_jdesc_check(jd);
 458        if (error)
 459                goto fail_gunlock_ji;
 460
 461        error = gfs2_find_jhead(jd, &head);
 462        if (error)
 463                goto fail_gunlock_ji;
 464        t_jhd = ktime_get();
 465
 466        if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
 467                fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
 468                        jd->jd_jid);
 469
 470                /* Acquire a shared hold on the freeze lock */
 471
 472                error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
 473                                           LM_FLAG_NOEXP | LM_FLAG_PRIORITY,
 474                                           &thaw_gh);
 475                if (error)
 476                        goto fail_gunlock_ji;
 477
 478                if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) {
 479                        ro = 1;
 480                } else if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
 481                        if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
 482                                ro = 1;
 483                } else {
 484                        if (sb_rdonly(sdp->sd_vfs)) {
 485                                /* check if device itself is read-only */
 486                                ro = bdev_read_only(sdp->sd_vfs->s_bdev);
 487                                if (!ro) {
 488                                        fs_info(sdp, "recovery required on "
 489                                                "read-only filesystem.\n");
 490                                        fs_info(sdp, "write access will be "
 491                                                "enabled during recovery.\n");
 492                                }
 493                        }
 494                }
 495
 496                if (ro) {
 497                        fs_warn(sdp, "jid=%u: Can't replay: read-only block "
 498                                "device\n", jd->jd_jid);
 499                        error = -EROFS;
 500                        goto fail_gunlock_thaw;
 501                }
 502
 503                t_tlck = ktime_get();
 504                fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
 505
 506                for (pass = 0; pass < 2; pass++) {
 507                        lops_before_scan(jd, &head, pass);
 508                        error = foreach_descriptor(jd, head.lh_tail,
 509                                                   head.lh_blkno, pass);
 510                        lops_after_scan(jd, error, pass);
 511                        if (error)
 512                                goto fail_gunlock_thaw;
 513                }
 514
 515                clean_journal(jd, &head);
 516
 517                gfs2_glock_dq_uninit(&thaw_gh);
 518                t_rep = ktime_get();
 519                fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, "
 520                        "jhead:%lldms, tlck:%lldms, replay:%lldms]\n",
 521                        jd->jd_jid, ktime_ms_delta(t_rep, t_start),
 522                        ktime_ms_delta(t_jlck, t_start),
 523                        ktime_ms_delta(t_jhd, t_jlck),
 524                        ktime_ms_delta(t_tlck, t_jhd),
 525                        ktime_ms_delta(t_rep, t_tlck));
 526        }
 527
 528        gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
 529
 530        if (jlocked) {
 531                gfs2_glock_dq_uninit(&ji_gh);
 532                gfs2_glock_dq_uninit(&j_gh);
 533        }
 534
 535        fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
 536        goto done;
 537
 538fail_gunlock_thaw:
 539        gfs2_glock_dq_uninit(&thaw_gh);
 540fail_gunlock_ji:
 541        if (jlocked) {
 542                gfs2_glock_dq_uninit(&ji_gh);
 543fail_gunlock_j:
 544                gfs2_glock_dq_uninit(&j_gh);
 545        }
 546
 547        fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
 548fail:
 549        jd->jd_recover_error = error;
 550        gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
 551done:
 552        clear_bit(JDF_RECOVERY, &jd->jd_flags);
 553        smp_mb__after_atomic();
 554        wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
 555}
 556
 557int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
 558{
 559        int rv;
 560
 561        if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
 562                return -EBUSY;
 563
 564        /* we have JDF_RECOVERY, queue should always succeed */
 565        rv = queue_work(gfs_recovery_wq, &jd->jd_work);
 566        BUG_ON(!rv);
 567
 568        if (wait)
 569                wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
 570                            TASK_UNINTERRUPTIBLE);
 571
 572        return wait ? jd->jd_recover_error : 0;
 573}
 574
 575