linux/fs/gfs2/recovery.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   4 *
   5 * This copyrighted material is made available to anyone wishing to use,
   6 * modify, copy, or redistribute it subject to the terms and conditions
   7 * of the GNU General Public License version 2.
   8 */
   9
  10#include <linux/module.h>
  11#include <linux/slab.h>
  12#include <linux/spinlock.h>
  13#include <linux/completion.h>
  14#include <linux/buffer_head.h>
  15#include <linux/gfs2_ondisk.h>
  16#include <linux/crc32.h>
  17#include <linux/slow-work.h>
  18
  19#include "gfs2.h"
  20#include "incore.h"
  21#include "bmap.h"
  22#include "glock.h"
  23#include "glops.h"
  24#include "lops.h"
  25#include "meta_io.h"
  26#include "recovery.h"
  27#include "super.h"
  28#include "util.h"
  29#include "dir.h"
  30
  31int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
  32                           struct buffer_head **bh)
  33{
  34        struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
  35        struct gfs2_glock *gl = ip->i_gl;
  36        int new = 0;
  37        u64 dblock;
  38        u32 extlen;
  39        int error;
  40
  41        error = gfs2_extent_map(&ip->i_inode, blk, &new, &dblock, &extlen);
  42        if (error)
  43                return error;
  44        if (!dblock) {
  45                gfs2_consist_inode(ip);
  46                return -EIO;
  47        }
  48
  49        *bh = gfs2_meta_ra(gl, dblock, extlen);
  50
  51        return error;
  52}
  53
  54int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
  55{
  56        struct list_head *head = &sdp->sd_revoke_list;
  57        struct gfs2_revoke_replay *rr;
  58        int found = 0;
  59
  60        list_for_each_entry(rr, head, rr_list) {
  61                if (rr->rr_blkno == blkno) {
  62                        found = 1;
  63                        break;
  64                }
  65        }
  66
  67        if (found) {
  68                rr->rr_where = where;
  69                return 0;
  70        }
  71
  72        rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_NOFS);
  73        if (!rr)
  74                return -ENOMEM;
  75
  76        rr->rr_blkno = blkno;
  77        rr->rr_where = where;
  78        list_add(&rr->rr_list, head);
  79
  80        return 1;
  81}
  82
  83int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
  84{
  85        struct gfs2_revoke_replay *rr;
  86        int wrap, a, b, revoke;
  87        int found = 0;
  88
  89        list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) {
  90                if (rr->rr_blkno == blkno) {
  91                        found = 1;
  92                        break;
  93                }
  94        }
  95
  96        if (!found)
  97                return 0;
  98
  99        wrap = (rr->rr_where < sdp->sd_replay_tail);
 100        a = (sdp->sd_replay_tail < where);
 101        b = (where < rr->rr_where);
 102        revoke = (wrap) ? (a || b) : (a && b);
 103
 104        return revoke;
 105}
 106
 107void gfs2_revoke_clean(struct gfs2_sbd *sdp)
 108{
 109        struct list_head *head = &sdp->sd_revoke_list;
 110        struct gfs2_revoke_replay *rr;
 111
 112        while (!list_empty(head)) {
 113                rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
 114                list_del(&rr->rr_list);
 115                kfree(rr);
 116        }
 117}
 118
 119static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
 120{
 121        const struct gfs2_log_header *str = buf;
 122
 123        if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
 124            str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH))
 125                return 1;
 126
 127        lh->lh_sequence = be64_to_cpu(str->lh_sequence);
 128        lh->lh_flags = be32_to_cpu(str->lh_flags);
 129        lh->lh_tail = be32_to_cpu(str->lh_tail);
 130        lh->lh_blkno = be32_to_cpu(str->lh_blkno);
 131        lh->lh_hash = be32_to_cpu(str->lh_hash);
 132        return 0;
 133}
 134
 135/**
 136 * get_log_header - read the log header for a given segment
 137 * @jd: the journal
 138 * @blk: the block to look at
 139 * @lh: the log header to return
 140 *
 141 * Read the log header for a given segement in a given journal.  Do a few
 142 * sanity checks on it.
 143 *
 144 * Returns: 0 on success,
 145 *          1 if the header was invalid or incomplete,
 146 *          errno on error
 147 */
 148
 149static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 150                          struct gfs2_log_header_host *head)
 151{
 152        struct buffer_head *bh;
 153        struct gfs2_log_header_host uninitialized_var(lh);
 154        const u32 nothing = 0;
 155        u32 hash;
 156        int error;
 157
 158        error = gfs2_replay_read_block(jd, blk, &bh);
 159        if (error)
 160                return error;
 161
 162        hash = crc32_le((u32)~0, bh->b_data, sizeof(struct gfs2_log_header) -
 163                                             sizeof(u32));
 164        hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
 165        hash ^= (u32)~0;
 166        error = gfs2_log_header_in(&lh, bh->b_data);
 167        brelse(bh);
 168
 169        if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
 170                return 1;
 171
 172        *head = lh;
 173
 174        return 0;
 175}
 176
 177/**
 178 * find_good_lh - find a good log header
 179 * @jd: the journal
 180 * @blk: the segment to start searching from
 181 * @lh: the log header to fill in
 182 * @forward: if true search forward in the log, else search backward
 183 *
 184 * Call get_log_header() to get a log header for a segment, but if the
 185 * segment is bad, either scan forward or backward until we find a good one.
 186 *
 187 * Returns: errno
 188 */
 189
 190static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
 191                        struct gfs2_log_header_host *head)
 192{
 193        unsigned int orig_blk = *blk;
 194        int error;
 195
 196        for (;;) {
 197                error = get_log_header(jd, *blk, head);
 198                if (error <= 0)
 199                        return error;
 200
 201                if (++*blk == jd->jd_blocks)
 202                        *blk = 0;
 203
 204                if (*blk == orig_blk) {
 205                        gfs2_consist_inode(GFS2_I(jd->jd_inode));
 206                        return -EIO;
 207                }
 208        }
 209}
 210
 211/**
 212 * jhead_scan - make sure we've found the head of the log
 213 * @jd: the journal
 214 * @head: this is filled in with the log descriptor of the head
 215 *
 216 * At this point, seg and lh should be either the head of the log or just
 217 * before.  Scan forward until we find the head.
 218 *
 219 * Returns: errno
 220 */
 221
 222static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 223{
 224        unsigned int blk = head->lh_blkno;
 225        struct gfs2_log_header_host lh;
 226        int error;
 227
 228        for (;;) {
 229                if (++blk == jd->jd_blocks)
 230                        blk = 0;
 231
 232                error = get_log_header(jd, blk, &lh);
 233                if (error < 0)
 234                        return error;
 235                if (error == 1)
 236                        continue;
 237
 238                if (lh.lh_sequence == head->lh_sequence) {
 239                        gfs2_consist_inode(GFS2_I(jd->jd_inode));
 240                        return -EIO;
 241                }
 242                if (lh.lh_sequence < head->lh_sequence)
 243                        break;
 244
 245                *head = lh;
 246        }
 247
 248        return 0;
 249}
 250
 251/**
 252 * gfs2_find_jhead - find the head of a log
 253 * @jd: the journal
 254 * @head: the log descriptor for the head of the log is returned here
 255 *
 256 * Do a binary search of a journal and find the valid log entry with the
 257 * highest sequence number.  (i.e. the log head)
 258 *
 259 * Returns: errno
 260 */
 261
 262int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 263{
 264        struct gfs2_log_header_host lh_1, lh_m;
 265        u32 blk_1, blk_2, blk_m;
 266        int error;
 267
 268        blk_1 = 0;
 269        blk_2 = jd->jd_blocks - 1;
 270
 271        for (;;) {
 272                blk_m = (blk_1 + blk_2) / 2;
 273
 274                error = find_good_lh(jd, &blk_1, &lh_1);
 275                if (error)
 276                        return error;
 277
 278                error = find_good_lh(jd, &blk_m, &lh_m);
 279                if (error)
 280                        return error;
 281
 282                if (blk_1 == blk_m || blk_m == blk_2)
 283                        break;
 284
 285                if (lh_1.lh_sequence <= lh_m.lh_sequence)
 286                        blk_1 = blk_m;
 287                else
 288                        blk_2 = blk_m;
 289        }
 290
 291        error = jhead_scan(jd, &lh_1);
 292        if (error)
 293                return error;
 294
 295        *head = lh_1;
 296
 297        return error;
 298}
 299
 300/**
 301 * foreach_descriptor - go through the active part of the log
 302 * @jd: the journal
 303 * @start: the first log header in the active region
 304 * @end: the last log header (don't process the contents of this entry))
 305 *
 306 * Call a given function once for every log descriptor in the active
 307 * portion of the log.
 308 *
 309 * Returns: errno
 310 */
 311
 312static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
 313                              unsigned int end, int pass)
 314{
 315        struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 316        struct buffer_head *bh;
 317        struct gfs2_log_descriptor *ld;
 318        int error = 0;
 319        u32 length;
 320        __be64 *ptr;
 321        unsigned int offset = sizeof(struct gfs2_log_descriptor);
 322        offset += sizeof(__be64) - 1;
 323        offset &= ~(sizeof(__be64) - 1);
 324
 325        while (start != end) {
 326                error = gfs2_replay_read_block(jd, start, &bh);
 327                if (error)
 328                        return error;
 329                if (gfs2_meta_check(sdp, bh)) {
 330                        brelse(bh);
 331                        return -EIO;
 332                }
 333                ld = (struct gfs2_log_descriptor *)bh->b_data;
 334                length = be32_to_cpu(ld->ld_length);
 335
 336                if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
 337                        struct gfs2_log_header_host lh;
 338                        error = get_log_header(jd, start, &lh);
 339                        if (!error) {
 340                                gfs2_replay_incr_blk(sdp, &start);
 341                                brelse(bh);
 342                                continue;
 343                        }
 344                        if (error == 1) {
 345                                gfs2_consist_inode(GFS2_I(jd->jd_inode));
 346                                error = -EIO;
 347                        }
 348                        brelse(bh);
 349                        return error;
 350                } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
 351                        brelse(bh);
 352                        return -EIO;
 353                }
 354                ptr = (__be64 *)(bh->b_data + offset);
 355                error = lops_scan_elements(jd, start, ld, ptr, pass);
 356                if (error) {
 357                        brelse(bh);
 358                        return error;
 359                }
 360
 361                while (length--)
 362                        gfs2_replay_incr_blk(sdp, &start);
 363
 364                brelse(bh);
 365        }
 366
 367        return 0;
 368}
 369
 370/**
 371 * clean_journal - mark a dirty journal as being clean
 372 * @sdp: the filesystem
 373 * @jd: the journal
 374 * @gl: the journal's glock
 375 * @head: the head journal to start from
 376 *
 377 * Returns: errno
 378 */
 379
 380static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 381{
 382        struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 383        struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 384        unsigned int lblock;
 385        struct gfs2_log_header *lh;
 386        u32 hash;
 387        struct buffer_head *bh;
 388        int error;
 389        struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
 390
 391        lblock = head->lh_blkno;
 392        gfs2_replay_incr_blk(sdp, &lblock);
 393        bh_map.b_size = 1 << ip->i_inode.i_blkbits;
 394        error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0);
 395        if (error)
 396                return error;
 397        if (!bh_map.b_blocknr) {
 398                gfs2_consist_inode(ip);
 399                return -EIO;
 400        }
 401
 402        bh = sb_getblk(sdp->sd_vfs, bh_map.b_blocknr);
 403        lock_buffer(bh);
 404        memset(bh->b_data, 0, bh->b_size);
 405        set_buffer_uptodate(bh);
 406        clear_buffer_dirty(bh);
 407        unlock_buffer(bh);
 408
 409        lh = (struct gfs2_log_header *)bh->b_data;
 410        memset(lh, 0, sizeof(struct gfs2_log_header));
 411        lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
 412        lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
 413        lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
 414        lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
 415        lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
 416        lh->lh_blkno = cpu_to_be32(lblock);
 417        hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
 418        lh->lh_hash = cpu_to_be32(hash);
 419
 420        set_buffer_dirty(bh);
 421        if (sync_dirty_buffer(bh))
 422                gfs2_io_error_bh(sdp, bh);
 423        brelse(bh);
 424
 425        return error;
 426}
 427
 428
 429static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
 430                               unsigned int message)
 431{
 432        char env_jid[20];
 433        char env_status[20];
 434        char *envp[] = { env_jid, env_status, NULL };
 435        struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 436        ls->ls_recover_jid_done = jid;
 437        ls->ls_recover_jid_status = message;
 438        sprintf(env_jid, "JID=%d", jid);
 439        sprintf(env_status, "RECOVERY=%s",
 440                message == LM_RD_SUCCESS ? "Done" : "Failed");
 441        kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
 442}
 443
 444static int gfs2_recover_get_ref(struct slow_work *work)
 445{
 446        struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
 447        if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
 448                return -EBUSY;
 449        return 0;
 450}
 451
 452static void gfs2_recover_put_ref(struct slow_work *work)
 453{
 454        struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
 455        clear_bit(JDF_RECOVERY, &jd->jd_flags);
 456        smp_mb__after_clear_bit();
 457        wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
 458}
 459
 460static void gfs2_recover_work(struct slow_work *work)
 461{
 462        struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
 463        struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 464        struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 465        struct gfs2_log_header_host head;
 466        struct gfs2_holder j_gh, ji_gh, t_gh;
 467        unsigned long t;
 468        int ro = 0;
 469        unsigned int pass;
 470        int error;
 471
 472        if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
 473                fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
 474                        jd->jd_jid);
 475
 476                /* Acquire the journal lock so we can do recovery */
 477
 478                error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
 479                                          LM_ST_EXCLUSIVE,
 480                                          LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
 481                                          &j_gh);
 482                switch (error) {
 483                case 0:
 484                        break;
 485
 486                case GLR_TRYFAILED:
 487                        fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
 488                        error = 0;
 489
 490                default:
 491                        goto fail;
 492                };
 493
 494                error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
 495                                           LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh);
 496                if (error)
 497                        goto fail_gunlock_j;
 498        } else {
 499                fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
 500        }
 501
 502        fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
 503
 504        error = gfs2_jdesc_check(jd);
 505        if (error)
 506                goto fail_gunlock_ji;
 507
 508        error = gfs2_find_jhead(jd, &head);
 509        if (error)
 510                goto fail_gunlock_ji;
 511
 512        if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
 513                fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
 514                        jd->jd_jid);
 515
 516                t = jiffies;
 517
 518                /* Acquire a shared hold on the transaction lock */
 519
 520                error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
 521                                           LM_FLAG_NOEXP | LM_FLAG_PRIORITY |
 522                                           GL_NOCACHE, &t_gh);
 523                if (error)
 524                        goto fail_gunlock_ji;
 525
 526                if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
 527                        if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
 528                                ro = 1;
 529                } else {
 530                        if (sdp->sd_vfs->s_flags & MS_RDONLY) {
 531                                /* check if device itself is read-only */
 532                                ro = bdev_read_only(sdp->sd_vfs->s_bdev);
 533                                if (!ro) {
 534                                        fs_info(sdp, "recovery required on "
 535                                                "read-only filesystem.\n");
 536                                        fs_info(sdp, "write access will be "
 537                                                "enabled during recovery.\n");
 538                                }
 539                        }
 540                }
 541
 542                if (ro) {
 543                        fs_warn(sdp, "jid=%u: Can't replay: read-only block "
 544                                "device\n", jd->jd_jid);
 545                        error = -EROFS;
 546                        goto fail_gunlock_tr;
 547                }
 548
 549                fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
 550
 551                for (pass = 0; pass < 2; pass++) {
 552                        lops_before_scan(jd, &head, pass);
 553                        error = foreach_descriptor(jd, head.lh_tail,
 554                                                   head.lh_blkno, pass);
 555                        lops_after_scan(jd, error, pass);
 556                        if (error)
 557                                goto fail_gunlock_tr;
 558                }
 559
 560                error = clean_journal(jd, &head);
 561                if (error)
 562                        goto fail_gunlock_tr;
 563
 564                gfs2_glock_dq_uninit(&t_gh);
 565                t = DIV_ROUND_UP(jiffies - t, HZ);
 566                fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
 567                        jd->jd_jid, t);
 568        }
 569
 570        if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
 571                gfs2_glock_dq_uninit(&ji_gh);
 572
 573        gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
 574
 575        if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
 576                gfs2_glock_dq_uninit(&j_gh);
 577
 578        fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
 579        return;
 580
 581fail_gunlock_tr:
 582        gfs2_glock_dq_uninit(&t_gh);
 583fail_gunlock_ji:
 584        if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
 585                gfs2_glock_dq_uninit(&ji_gh);
 586fail_gunlock_j:
 587                gfs2_glock_dq_uninit(&j_gh);
 588        }
 589
 590        fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
 591
 592fail:
 593        gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
 594}
 595
 596struct slow_work_ops gfs2_recover_ops = {
 597        .owner   = THIS_MODULE,
 598        .get_ref = gfs2_recover_get_ref,
 599        .put_ref = gfs2_recover_put_ref,
 600        .execute = gfs2_recover_work,
 601};
 602
 603
 604static int gfs2_recovery_wait(void *word)
 605{
 606        schedule();
 607        return 0;
 608}
 609
 610int gfs2_recover_journal(struct gfs2_jdesc *jd)
 611{
 612        int rv;
 613        rv = slow_work_enqueue(&jd->jd_work);
 614        if (rv)
 615                return rv;
 616        wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE);
 617        return 0;
 618}
 619
 620