linux/fs/ext4/mmp.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/fs.h>
   3#include <linux/random.h>
   4#include <linux/buffer_head.h>
   5#include <linux/utsname.h>
   6#include <linux/kthread.h>
   7
   8#include "ext4.h"
   9
  10/* Checksumming functions */
  11static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
  12{
  13        struct ext4_sb_info *sbi = EXT4_SB(sb);
  14        int offset = offsetof(struct mmp_struct, mmp_checksum);
  15        __u32 csum;
  16
  17        csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
  18
  19        return cpu_to_le32(csum);
  20}
  21
  22static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
  23{
  24        if (!ext4_has_metadata_csum(sb))
  25                return 1;
  26
  27        return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
  28}
  29
  30static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
  31{
  32        if (!ext4_has_metadata_csum(sb))
  33                return;
  34
  35        mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
  36}
  37
  38/*
  39 * Write the MMP block using REQ_SYNC to try to get the block on-disk
  40 * faster.
  41 */
  42static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
  43{
  44        struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
  45
  46        /*
  47         * We protect against freezing so that we don't create dirty buffers
  48         * on frozen filesystem.
  49         */
  50        sb_start_write(sb);
  51        ext4_mmp_csum_set(sb, mmp);
  52        mark_buffer_dirty(bh);
  53        lock_buffer(bh);
  54        bh->b_end_io = end_buffer_write_sync;
  55        get_bh(bh);
  56        submit_bh(REQ_OP_WRITE, REQ_SYNC | REQ_META | REQ_PRIO, bh);
  57        wait_on_buffer(bh);
  58        sb_end_write(sb);
  59        if (unlikely(!buffer_uptodate(bh)))
  60                return 1;
  61
  62        return 0;
  63}
  64
  65/*
  66 * Read the MMP block. It _must_ be read from disk and hence we clear the
  67 * uptodate flag on the buffer.
  68 */
  69static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
  70                          ext4_fsblk_t mmp_block)
  71{
  72        struct mmp_struct *mmp;
  73        int ret;
  74
  75        if (*bh)
  76                clear_buffer_uptodate(*bh);
  77
  78        /* This would be sb_bread(sb, mmp_block), except we need to be sure
  79         * that the MD RAID device cache has been bypassed, and that the read
  80         * is not blocked in the elevator. */
  81        if (!*bh) {
  82                *bh = sb_getblk(sb, mmp_block);
  83                if (!*bh) {
  84                        ret = -ENOMEM;
  85                        goto warn_exit;
  86                }
  87        }
  88
  89        get_bh(*bh);
  90        lock_buffer(*bh);
  91        (*bh)->b_end_io = end_buffer_read_sync;
  92        submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, *bh);
  93        wait_on_buffer(*bh);
  94        if (!buffer_uptodate(*bh)) {
  95                ret = -EIO;
  96                goto warn_exit;
  97        }
  98        mmp = (struct mmp_struct *)((*bh)->b_data);
  99        if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) {
 100                ret = -EFSCORRUPTED;
 101                goto warn_exit;
 102        }
 103        if (!ext4_mmp_csum_verify(sb, mmp)) {
 104                ret = -EFSBADCRC;
 105                goto warn_exit;
 106        }
 107        return 0;
 108warn_exit:
 109        brelse(*bh);
 110        *bh = NULL;
 111        ext4_warning(sb, "Error %d while reading MMP block %llu",
 112                     ret, mmp_block);
 113        return ret;
 114}
 115
 116/*
 117 * Dump as much information as possible to help the admin.
 118 */
 119void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
 120                    const char *function, unsigned int line, const char *msg)
 121{
 122        __ext4_warning(sb, function, line, "%s", msg);
 123        __ext4_warning(sb, function, line,
 124                       "MMP failure info: last update time: %llu, last update "
 125                       "node: %s, last update device: %s",
 126                       (long long unsigned int) le64_to_cpu(mmp->mmp_time),
 127                       mmp->mmp_nodename, mmp->mmp_bdevname);
 128}
 129
 130/*
 131 * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
 132 */
 133static int kmmpd(void *data)
 134{
 135        struct super_block *sb = ((struct mmpd_data *) data)->sb;
 136        struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
 137        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 138        struct mmp_struct *mmp;
 139        ext4_fsblk_t mmp_block;
 140        u32 seq = 0;
 141        unsigned long failed_writes = 0;
 142        int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
 143        unsigned mmp_check_interval;
 144        unsigned long last_update_time;
 145        unsigned long diff;
 146        int retval;
 147
 148        mmp_block = le64_to_cpu(es->s_mmp_block);
 149        mmp = (struct mmp_struct *)(bh->b_data);
 150        mmp->mmp_time = cpu_to_le64(get_seconds());
 151        /*
 152         * Start with the higher mmp_check_interval and reduce it if
 153         * the MMP block is being updated on time.
 154         */
 155        mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
 156                                 EXT4_MMP_MIN_CHECK_INTERVAL);
 157        mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
 158        bdevname(bh->b_bdev, mmp->mmp_bdevname);
 159
 160        memcpy(mmp->mmp_nodename, init_utsname()->nodename,
 161               sizeof(mmp->mmp_nodename));
 162
 163        while (!kthread_should_stop()) {
 164                if (++seq > EXT4_MMP_SEQ_MAX)
 165                        seq = 1;
 166
 167                mmp->mmp_seq = cpu_to_le32(seq);
 168                mmp->mmp_time = cpu_to_le64(get_seconds());
 169                last_update_time = jiffies;
 170
 171                retval = write_mmp_block(sb, bh);
 172                /*
 173                 * Don't spew too many error messages. Print one every
 174                 * (s_mmp_update_interval * 60) seconds.
 175                 */
 176                if (retval) {
 177                        if ((failed_writes % 60) == 0)
 178                                ext4_error(sb, "Error writing to MMP block");
 179                        failed_writes++;
 180                }
 181
 182                if (!(le32_to_cpu(es->s_feature_incompat) &
 183                    EXT4_FEATURE_INCOMPAT_MMP)) {
 184                        ext4_warning(sb, "kmmpd being stopped since MMP feature"
 185                                     " has been disabled.");
 186                        goto exit_thread;
 187                }
 188
 189                if (sb_rdonly(sb)) {
 190                        ext4_warning(sb, "kmmpd being stopped since filesystem "
 191                                     "has been remounted as readonly.");
 192                        goto exit_thread;
 193                }
 194
 195                diff = jiffies - last_update_time;
 196                if (diff < mmp_update_interval * HZ)
 197                        schedule_timeout_interruptible(mmp_update_interval *
 198                                                       HZ - diff);
 199
 200                /*
 201                 * We need to make sure that more than mmp_check_interval
 202                 * seconds have not passed since writing. If that has happened
 203                 * we need to check if the MMP block is as we left it.
 204                 */
 205                diff = jiffies - last_update_time;
 206                if (diff > mmp_check_interval * HZ) {
 207                        struct buffer_head *bh_check = NULL;
 208                        struct mmp_struct *mmp_check;
 209
 210                        retval = read_mmp_block(sb, &bh_check, mmp_block);
 211                        if (retval) {
 212                                ext4_error(sb, "error reading MMP data: %d",
 213                                           retval);
 214                                goto exit_thread;
 215                        }
 216
 217                        mmp_check = (struct mmp_struct *)(bh_check->b_data);
 218                        if (mmp->mmp_seq != mmp_check->mmp_seq ||
 219                            memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
 220                                   sizeof(mmp->mmp_nodename))) {
 221                                dump_mmp_msg(sb, mmp_check,
 222                                             "Error while updating MMP info. "
 223                                             "The filesystem seems to have been"
 224                                             " multiply mounted.");
 225                                ext4_error(sb, "abort");
 226                                put_bh(bh_check);
 227                                retval = -EBUSY;
 228                                goto exit_thread;
 229                        }
 230                        put_bh(bh_check);
 231                }
 232
 233                 /*
 234                 * Adjust the mmp_check_interval depending on how much time
 235                 * it took for the MMP block to be written.
 236                 */
 237                mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
 238                                             EXT4_MMP_MAX_CHECK_INTERVAL),
 239                                         EXT4_MMP_MIN_CHECK_INTERVAL);
 240                mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
 241        }
 242
 243        /*
 244         * Unmount seems to be clean.
 245         */
 246        mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
 247        mmp->mmp_time = cpu_to_le64(get_seconds());
 248
 249        retval = write_mmp_block(sb, bh);
 250
 251exit_thread:
 252        EXT4_SB(sb)->s_mmp_tsk = NULL;
 253        kfree(data);
 254        brelse(bh);
 255        return retval;
 256}
 257
 258/*
 259 * Get a random new sequence number but make sure it is not greater than
 260 * EXT4_MMP_SEQ_MAX.
 261 */
 262static unsigned int mmp_new_seq(void)
 263{
 264        u32 new_seq;
 265
 266        do {
 267                new_seq = prandom_u32();
 268        } while (new_seq > EXT4_MMP_SEQ_MAX);
 269
 270        return new_seq;
 271}
 272
 273/*
 274 * Protect the filesystem from being mounted more than once.
 275 */
 276int ext4_multi_mount_protect(struct super_block *sb,
 277                                    ext4_fsblk_t mmp_block)
 278{
 279        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 280        struct buffer_head *bh = NULL;
 281        struct mmp_struct *mmp = NULL;
 282        struct mmpd_data *mmpd_data;
 283        u32 seq;
 284        unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
 285        unsigned int wait_time = 0;
 286        int retval;
 287
 288        if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
 289            mmp_block >= ext4_blocks_count(es)) {
 290                ext4_warning(sb, "Invalid MMP block in superblock");
 291                goto failed;
 292        }
 293
 294        retval = read_mmp_block(sb, &bh, mmp_block);
 295        if (retval)
 296                goto failed;
 297
 298        mmp = (struct mmp_struct *)(bh->b_data);
 299
 300        if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
 301                mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
 302
 303        /*
 304         * If check_interval in MMP block is larger, use that instead of
 305         * update_interval from the superblock.
 306         */
 307        if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
 308                mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
 309
 310        seq = le32_to_cpu(mmp->mmp_seq);
 311        if (seq == EXT4_MMP_SEQ_CLEAN)
 312                goto skip;
 313
 314        if (seq == EXT4_MMP_SEQ_FSCK) {
 315                dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
 316                goto failed;
 317        }
 318
 319        wait_time = min(mmp_check_interval * 2 + 1,
 320                        mmp_check_interval + 60);
 321
 322        /* Print MMP interval if more than 20 secs. */
 323        if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
 324                ext4_warning(sb, "MMP interval %u higher than expected, please"
 325                             " wait.\n", wait_time * 2);
 326
 327        if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
 328                ext4_warning(sb, "MMP startup interrupted, failing mount\n");
 329                goto failed;
 330        }
 331
 332        retval = read_mmp_block(sb, &bh, mmp_block);
 333        if (retval)
 334                goto failed;
 335        mmp = (struct mmp_struct *)(bh->b_data);
 336        if (seq != le32_to_cpu(mmp->mmp_seq)) {
 337                dump_mmp_msg(sb, mmp,
 338                             "Device is already active on another node.");
 339                goto failed;
 340        }
 341
 342skip:
 343        /*
 344         * write a new random sequence number.
 345         */
 346        seq = mmp_new_seq();
 347        mmp->mmp_seq = cpu_to_le32(seq);
 348
 349        retval = write_mmp_block(sb, bh);
 350        if (retval)
 351                goto failed;
 352
 353        /*
 354         * wait for MMP interval and check mmp_seq.
 355         */
 356        if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
 357                ext4_warning(sb, "MMP startup interrupted, failing mount");
 358                goto failed;
 359        }
 360
 361        retval = read_mmp_block(sb, &bh, mmp_block);
 362        if (retval)
 363                goto failed;
 364        mmp = (struct mmp_struct *)(bh->b_data);
 365        if (seq != le32_to_cpu(mmp->mmp_seq)) {
 366                dump_mmp_msg(sb, mmp,
 367                             "Device is already active on another node.");
 368                goto failed;
 369        }
 370
 371        mmpd_data = kmalloc(sizeof(*mmpd_data), GFP_KERNEL);
 372        if (!mmpd_data) {
 373                ext4_warning(sb, "not enough memory for mmpd_data");
 374                goto failed;
 375        }
 376        mmpd_data->sb = sb;
 377        mmpd_data->bh = bh;
 378
 379        /*
 380         * Start a kernel thread to update the MMP block periodically.
 381         */
 382        EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
 383                                             bdevname(bh->b_bdev,
 384                                                      mmp->mmp_bdevname));
 385        if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
 386                EXT4_SB(sb)->s_mmp_tsk = NULL;
 387                kfree(mmpd_data);
 388                ext4_warning(sb, "Unable to create kmmpd thread for %s.",
 389                             sb->s_id);
 390                goto failed;
 391        }
 392
 393        return 0;
 394
 395failed:
 396        brelse(bh);
 397        return 1;
 398}
 399
 400
 401