linux/fs/gfs2/util.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   4 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   5 */
   6
   7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   8
   9#include <linux/spinlock.h>
  10#include <linux/completion.h>
  11#include <linux/buffer_head.h>
  12#include <linux/crc32.h>
  13#include <linux/gfs2_ondisk.h>
  14#include <linux/delay.h>
  15#include <linux/uaccess.h>
  16
  17#include "gfs2.h"
  18#include "incore.h"
  19#include "glock.h"
  20#include "glops.h"
  21#include "log.h"
  22#include "lops.h"
  23#include "recovery.h"
  24#include "rgrp.h"
  25#include "super.h"
  26#include "util.h"
  27
  28struct kmem_cache *gfs2_glock_cachep __read_mostly;
  29struct kmem_cache *gfs2_glock_aspace_cachep __read_mostly;
  30struct kmem_cache *gfs2_inode_cachep __read_mostly;
  31struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
  32struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
  33struct kmem_cache *gfs2_quotad_cachep __read_mostly;
  34struct kmem_cache *gfs2_qadata_cachep __read_mostly;
  35struct kmem_cache *gfs2_trans_cachep __read_mostly;
  36mempool_t *gfs2_page_pool __read_mostly;
  37
  38void gfs2_assert_i(struct gfs2_sbd *sdp)
  39{
  40        fs_emerg(sdp, "fatal assertion failed\n");
  41}
  42
  43/**
  44 * check_journal_clean - Make sure a journal is clean for a spectator mount
  45 * @sdp: The GFS2 superblock
  46 * @jd: The journal descriptor
  47 * @verbose: Show more prints in the log
  48 *
  49 * Returns: 0 if the journal is clean or locked, else an error
  50 */
  51int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
  52                        bool verbose)
  53{
  54        int error;
  55        struct gfs2_holder j_gh;
  56        struct gfs2_log_header_host head;
  57        struct gfs2_inode *ip;
  58
  59        ip = GFS2_I(jd->jd_inode);
  60        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP |
  61                                   GL_EXACT | GL_NOCACHE, &j_gh);
  62        if (error) {
  63                if (verbose)
  64                        fs_err(sdp, "Error %d locking journal for spectator "
  65                               "mount.\n", error);
  66                return -EPERM;
  67        }
  68        error = gfs2_jdesc_check(jd);
  69        if (error) {
  70                if (verbose)
  71                        fs_err(sdp, "Error checking journal for spectator "
  72                               "mount.\n");
  73                goto out_unlock;
  74        }
  75        error = gfs2_find_jhead(jd, &head, false);
  76        if (error) {
  77                if (verbose)
  78                        fs_err(sdp, "Error parsing journal for spectator "
  79                               "mount.\n");
  80                goto out_unlock;
  81        }
  82        if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
  83                error = -EPERM;
  84                if (verbose)
  85                        fs_err(sdp, "jid=%u: Journal is dirty, so the first "
  86                               "mounter must not be a spectator.\n",
  87                               jd->jd_jid);
  88        }
  89
  90out_unlock:
  91        gfs2_glock_dq_uninit(&j_gh);
  92        return error;
  93}
  94
  95/**
  96 * gfs2_freeze_lock - hold the freeze glock
  97 * @sdp: the superblock
  98 * @freeze_gh: pointer to the requested holder
  99 * @caller_flags: any additional flags needed by the caller
 100 */
 101int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh,
 102                     int caller_flags)
 103{
 104        int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags;
 105        int error;
 106
 107        error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags,
 108                                   freeze_gh);
 109        if (error && error != GLR_TRYFAILED)
 110                fs_err(sdp, "can't lock the freeze lock: %d\n", error);
 111        return error;
 112}
 113
 114void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh)
 115{
 116        if (gfs2_holder_initialized(freeze_gh))
 117                gfs2_glock_dq_uninit(freeze_gh);
 118}
 119
 120static void signal_our_withdraw(struct gfs2_sbd *sdp)
 121{
 122        struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl;
 123        struct inode *inode;
 124        struct gfs2_inode *ip;
 125        struct gfs2_glock *i_gl;
 126        u64 no_formal_ino;
 127        int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 128        int ret = 0;
 129        int tries;
 130
 131        if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc)
 132                return;
 133
 134        gfs2_ail_drain(sdp); /* frees all transactions */
 135        inode = sdp->sd_jdesc->jd_inode;
 136        ip = GFS2_I(inode);
 137        i_gl = ip->i_gl;
 138        no_formal_ino = ip->i_no_formal_ino;
 139
 140        /* Prevent any glock dq until withdraw recovery is complete */
 141        set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
 142        /*
 143         * Don't tell dlm we're bailing until we have no more buffers in the
 144         * wind. If journal had an IO error, the log code should just purge
 145         * the outstanding buffers rather than submitting new IO. Making the
 146         * file system read-only will flush the journal, etc.
 147         *
 148         * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown
 149         * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write
 150         * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and
 151         * therefore we need to clear SDF_JOURNAL_LIVE manually.
 152         */
 153        clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 154        if (!sb_rdonly(sdp->sd_vfs)) {
 155                struct gfs2_holder freeze_gh;
 156
 157                gfs2_holder_mark_uninitialized(&freeze_gh);
 158                if (sdp->sd_freeze_gl &&
 159                    !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) {
 160                        ret = gfs2_freeze_lock(sdp, &freeze_gh,
 161                                       log_write_allowed ? 0 : LM_FLAG_TRY);
 162                        if (ret == GLR_TRYFAILED)
 163                                ret = 0;
 164                }
 165                if (!ret)
 166                        gfs2_make_fs_ro(sdp);
 167                gfs2_freeze_unlock(&freeze_gh);
 168        }
 169
 170        if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
 171                if (!ret)
 172                        ret = -EIO;
 173                clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
 174                goto skip_recovery;
 175        }
 176        /*
 177         * Drop the glock for our journal so another node can recover it.
 178         */
 179        if (gfs2_holder_initialized(&sdp->sd_journal_gh)) {
 180                gfs2_glock_dq_wait(&sdp->sd_journal_gh);
 181                gfs2_holder_uninit(&sdp->sd_journal_gh);
 182        }
 183        sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE;
 184        gfs2_glock_dq(&sdp->sd_jinode_gh);
 185        if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) {
 186                /* Make sure gfs2_unfreeze works if partially-frozen */
 187                flush_work(&sdp->sd_freeze_work);
 188                atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
 189                thaw_super(sdp->sd_vfs);
 190        } else {
 191                wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE,
 192                            TASK_UNINTERRUPTIBLE);
 193        }
 194
 195        /*
 196         * holder_uninit to force glock_put, to force dlm to let go
 197         */
 198        gfs2_holder_uninit(&sdp->sd_jinode_gh);
 199
 200        /*
 201         * Note: We need to be careful here:
 202         * Our iput of jd_inode will evict it. The evict will dequeue its
 203         * glock, but the glock dq will wait for the withdraw unless we have
 204         * exception code in glock_dq.
 205         */
 206        iput(inode);
 207        /*
 208         * Wait until the journal inode's glock is freed. This allows try locks
 209         * on other nodes to be successful, otherwise we remain the owner of
 210         * the glock as far as dlm is concerned.
 211         */
 212        if (i_gl->gl_ops->go_free) {
 213                set_bit(GLF_FREEING, &i_gl->gl_flags);
 214                wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
 215        }
 216
 217        /*
 218         * Dequeue the "live" glock, but keep a reference so it's never freed.
 219         */
 220        gfs2_glock_hold(live_gl);
 221        gfs2_glock_dq_wait(&sdp->sd_live_gh);
 222        /*
 223         * We enqueue the "live" glock in EX so that all other nodes
 224         * get a demote request and act on it. We don't really want the
 225         * lock in EX, so we send a "try" lock with 1CB to produce a callback.
 226         */
 227        fs_warn(sdp, "Requesting recovery of jid %d.\n",
 228                sdp->sd_lockstruct.ls_jid);
 229        gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | LM_FLAG_NOEXP,
 230                           &sdp->sd_live_gh);
 231        msleep(GL_GLOCK_MAX_HOLD);
 232        /*
 233         * This will likely fail in a cluster, but succeed standalone:
 234         */
 235        ret = gfs2_glock_nq(&sdp->sd_live_gh);
 236
 237        /*
 238         * If we actually got the "live" lock in EX mode, there are no other
 239         * nodes available to replay our journal. So we try to replay it
 240         * ourselves. We hold the "live" glock to prevent other mounters
 241         * during recovery, then just dequeue it and reacquire it in our
 242         * normal SH mode. Just in case the problem that caused us to
 243         * withdraw prevents us from recovering our journal (e.g. io errors
 244         * and such) we still check if the journal is clean before proceeding
 245         * but we may wait forever until another mounter does the recovery.
 246         */
 247        if (ret == 0) {
 248                fs_warn(sdp, "No other mounters found. Trying to recover our "
 249                        "own journal jid %d.\n", sdp->sd_lockstruct.ls_jid);
 250                if (gfs2_recover_journal(sdp->sd_jdesc, 1))
 251                        fs_warn(sdp, "Unable to recover our journal jid %d.\n",
 252                                sdp->sd_lockstruct.ls_jid);
 253                gfs2_glock_dq_wait(&sdp->sd_live_gh);
 254                gfs2_holder_reinit(LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT,
 255                                   &sdp->sd_live_gh);
 256                gfs2_glock_nq(&sdp->sd_live_gh);
 257        }
 258
 259        gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */
 260        clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
 261
 262        /*
 263         * At this point our journal is evicted, so we need to get a new inode
 264         * for it. Once done, we need to call gfs2_find_jhead which
 265         * calls gfs2_map_journal_extents to map it for us again.
 266         *
 267         * Note that we don't really want it to look up a FREE block. The
 268         * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup
 269         * which would otherwise fail because it requires grabbing an rgrp
 270         * glock, which would fail with -EIO because we're withdrawing.
 271         */
 272        inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN,
 273                                  sdp->sd_jdesc->jd_no_addr, no_formal_ino,
 274                                  GFS2_BLKST_FREE);
 275        if (IS_ERR(inode)) {
 276                fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n",
 277                        sdp->sd_lockstruct.ls_jid, PTR_ERR(inode));
 278                goto skip_recovery;
 279        }
 280        sdp->sd_jdesc->jd_inode = inode;
 281        d_mark_dontcache(inode);
 282
 283        /*
 284         * Now wait until recovery is complete.
 285         */
 286        for (tries = 0; tries < 10; tries++) {
 287                ret = check_journal_clean(sdp, sdp->sd_jdesc, false);
 288                if (!ret)
 289                        break;
 290                msleep(HZ);
 291                fs_warn(sdp, "Waiting for journal recovery jid %d.\n",
 292                        sdp->sd_lockstruct.ls_jid);
 293        }
 294skip_recovery:
 295        if (!ret)
 296                fs_warn(sdp, "Journal recovery complete for jid %d.\n",
 297                        sdp->sd_lockstruct.ls_jid);
 298        else
 299                fs_warn(sdp, "Journal recovery skipped for jid %d until next "
 300                        "mount.\n", sdp->sd_lockstruct.ls_jid);
 301        fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held);
 302        sdp->sd_glock_dqs_held = 0;
 303        wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY);
 304}
 305
 306void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...)
 307{
 308        struct va_format vaf;
 309        va_list args;
 310
 311        if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
 312            test_bit(SDF_WITHDRAWN, &sdp->sd_flags))
 313                return;
 314
 315        va_start(args, fmt);
 316        vaf.fmt = fmt;
 317        vaf.va = &args;
 318        fs_err(sdp, "%pV", &vaf);
 319        va_end(args);
 320}
 321
 322int gfs2_withdraw(struct gfs2_sbd *sdp)
 323{
 324        struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 325        const struct lm_lockops *lm = ls->ls_ops;
 326
 327        if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
 328            test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
 329                if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags))
 330                        return -1;
 331
 332                wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG,
 333                            TASK_UNINTERRUPTIBLE);
 334                return -1;
 335        }
 336
 337        set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
 338
 339        if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
 340                fs_err(sdp, "about to withdraw this file system\n");
 341                BUG_ON(sdp->sd_args.ar_debug);
 342
 343                signal_our_withdraw(sdp);
 344
 345                kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
 346
 347                if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
 348                        wait_for_completion(&sdp->sd_wdack);
 349
 350                if (lm->lm_unmount) {
 351                        fs_err(sdp, "telling LM to unmount\n");
 352                        lm->lm_unmount(sdp);
 353                }
 354                set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
 355                fs_err(sdp, "File system withdrawn\n");
 356                dump_stack();
 357                clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
 358                smp_mb__after_atomic();
 359                wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG);
 360        }
 361
 362        if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
 363                panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname);
 364
 365        return -1;
 366}
 367
 368/*
 369 * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
 370 */
 371
 372void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
 373                            const char *function, char *file, unsigned int line,
 374                            bool delayed)
 375{
 376        if (gfs2_withdrawn(sdp))
 377                return;
 378
 379        fs_err(sdp,
 380               "fatal: assertion \"%s\" failed\n"
 381               "   function = %s, file = %s, line = %u\n",
 382               assertion, function, file, line);
 383
 384        /*
 385         * If errors=panic was specified on mount, it won't help to delay the
 386         * withdraw.
 387         */
 388        if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
 389                delayed = false;
 390
 391        if (delayed)
 392                gfs2_withdraw_delayed(sdp);
 393        else
 394                gfs2_withdraw(sdp);
 395        dump_stack();
 396}
 397
 398/*
 399 * gfs2_assert_warn_i - Print a message to the console if @assertion is false
 400 */
 401
 402void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
 403                        const char *function, char *file, unsigned int line)
 404{
 405        if (time_before(jiffies,
 406                        sdp->sd_last_warning +
 407                        gfs2_tune_get(sdp, gt_complain_secs) * HZ))
 408                return;
 409
 410        if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW)
 411                fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n",
 412                        assertion, function, file, line);
 413
 414        if (sdp->sd_args.ar_debug)
 415                BUG();
 416        else
 417                dump_stack();
 418
 419        if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
 420                panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
 421                      "GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
 422                      sdp->sd_fsname, assertion,
 423                      sdp->sd_fsname, function, file, line);
 424
 425        sdp->sd_last_warning = jiffies;
 426}
 427
 428/*
 429 * gfs2_consist_i - Flag a filesystem consistency error and withdraw
 430 */
 431
 432void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function,
 433                    char *file, unsigned int line)
 434{
 435        gfs2_lm(sdp,
 436                "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n",
 437                function, file, line);
 438        gfs2_withdraw(sdp);
 439}
 440
 441/*
 442 * gfs2_consist_inode_i - Flag an inode consistency error and withdraw
 443 */
 444
 445void gfs2_consist_inode_i(struct gfs2_inode *ip,
 446                          const char *function, char *file, unsigned int line)
 447{
 448        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 449
 450        gfs2_lm(sdp,
 451                "fatal: filesystem consistency error\n"
 452                "  inode = %llu %llu\n"
 453                "  function = %s, file = %s, line = %u\n",
 454                (unsigned long long)ip->i_no_formal_ino,
 455                (unsigned long long)ip->i_no_addr,
 456                function, file, line);
 457        gfs2_withdraw(sdp);
 458}
 459
 460/*
 461 * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
 462 */
 463
 464void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
 465                          const char *function, char *file, unsigned int line)
 466{
 467        struct gfs2_sbd *sdp = rgd->rd_sbd;
 468        char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
 469
 470        sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
 471        gfs2_rgrp_dump(NULL, rgd, fs_id_buf);
 472        gfs2_lm(sdp,
 473                "fatal: filesystem consistency error\n"
 474                "  RG = %llu\n"
 475                "  function = %s, file = %s, line = %u\n",
 476                (unsigned long long)rgd->rd_addr,
 477                function, file, line);
 478        gfs2_withdraw(sdp);
 479}
 480
 481/*
 482 * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
 483 * Returns: -1 if this call withdrew the machine,
 484 *          -2 if it was already withdrawn
 485 */
 486
 487int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
 488                       const char *type, const char *function, char *file,
 489                       unsigned int line)
 490{
 491        int me;
 492
 493        gfs2_lm(sdp,
 494                "fatal: invalid metadata block\n"
 495                "  bh = %llu (%s)\n"
 496                "  function = %s, file = %s, line = %u\n",
 497                (unsigned long long)bh->b_blocknr, type,
 498                function, file, line);
 499        me = gfs2_withdraw(sdp);
 500        return (me) ? -1 : -2;
 501}
 502
 503/*
 504 * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
 505 * Returns: -1 if this call withdrew the machine,
 506 *          -2 if it was already withdrawn
 507 */
 508
 509int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
 510                           u16 type, u16 t, const char *function,
 511                           char *file, unsigned int line)
 512{
 513        int me;
 514
 515        gfs2_lm(sdp,
 516                "fatal: invalid metadata block\n"
 517                "  bh = %llu (type: exp=%u, found=%u)\n"
 518                "  function = %s, file = %s, line = %u\n",
 519                (unsigned long long)bh->b_blocknr, type, t,
 520                function, file, line);
 521        me = gfs2_withdraw(sdp);
 522        return (me) ? -1 : -2;
 523}
 524
 525/*
 526 * gfs2_io_error_i - Flag an I/O error and withdraw
 527 * Returns: -1 if this call withdrew the machine,
 528 *          0 if it was already withdrawn
 529 */
 530
 531int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
 532                    unsigned int line)
 533{
 534        gfs2_lm(sdp,
 535                "fatal: I/O error\n"
 536                "  function = %s, file = %s, line = %u\n",
 537                function, file, line);
 538        return gfs2_withdraw(sdp);
 539}
 540
 541/*
 542 * gfs2_io_error_bh_i - Flag a buffer I/O error
 543 * @withdraw: withdraw the filesystem
 544 */
 545
 546void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
 547                        const char *function, char *file, unsigned int line,
 548                        bool withdraw)
 549{
 550        if (gfs2_withdrawn(sdp))
 551                return;
 552
 553        fs_err(sdp, "fatal: I/O error\n"
 554               "  block = %llu\n"
 555               "  function = %s, file = %s, line = %u\n",
 556               (unsigned long long)bh->b_blocknr, function, file, line);
 557        if (withdraw)
 558                gfs2_withdraw(sdp);
 559}
 560
 561