linux/fs/ext4/mmp.c
<<
>>
Prefs
   1#include <linux/fs.h>
   2#include <linux/random.h>
   3#include <linux/buffer_head.h>
   4#include <linux/utsname.h>
   5#include <linux/kthread.h>
   6
   7#include "ext4.h"
   8
   9/* Checksumming functions */
  10static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
  11{
  12        struct ext4_sb_info *sbi = EXT4_SB(sb);
  13        int offset = offsetof(struct mmp_struct, mmp_checksum);
  14        __u32 csum;
  15
  16        csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
  17
  18        return cpu_to_le32(csum);
  19}
  20
  21static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
  22{
  23        if (!ext4_has_metadata_csum(sb))
  24                return 1;
  25
  26        return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
  27}
  28
  29static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
  30{
  31        if (!ext4_has_metadata_csum(sb))
  32                return;
  33
  34        mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
  35}
  36
  37/*
  38 * Write the MMP block using WRITE_SYNC to try to get the block on-disk
  39 * faster.
  40 */
  41static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
  42{
  43        struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
  44
  45        /*
  46         * We protect against freezing so that we don't create dirty buffers
  47         * on frozen filesystem.
  48         */
  49        sb_start_write(sb);
  50        ext4_mmp_csum_set(sb, mmp);
  51        mark_buffer_dirty(bh);
  52        lock_buffer(bh);
  53        bh->b_end_io = end_buffer_write_sync;
  54        get_bh(bh);
  55        submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
  56        wait_on_buffer(bh);
  57        sb_end_write(sb);
  58        if (unlikely(!buffer_uptodate(bh)))
  59                return 1;
  60
  61        return 0;
  62}
  63
  64/*
  65 * Read the MMP block. It _must_ be read from disk and hence we clear the
  66 * uptodate flag on the buffer.
  67 */
  68static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
  69                          ext4_fsblk_t mmp_block)
  70{
  71        struct mmp_struct *mmp;
  72        int ret;
  73
  74        if (*bh)
  75                clear_buffer_uptodate(*bh);
  76
  77        /* This would be sb_bread(sb, mmp_block), except we need to be sure
  78         * that the MD RAID device cache has been bypassed, and that the read
  79         * is not blocked in the elevator. */
  80        if (!*bh) {
  81                *bh = sb_getblk(sb, mmp_block);
  82                if (!*bh) {
  83                        ret = -ENOMEM;
  84                        goto warn_exit;
  85                }
  86        }
  87
  88        get_bh(*bh);
  89        lock_buffer(*bh);
  90        (*bh)->b_end_io = end_buffer_read_sync;
  91        submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
  92        wait_on_buffer(*bh);
  93        if (!buffer_uptodate(*bh)) {
  94                brelse(*bh);
  95                *bh = NULL;
  96                ret = -EIO;
  97                goto warn_exit;
  98        }
  99
 100        mmp = (struct mmp_struct *)((*bh)->b_data);
 101        if (le32_to_cpu(mmp->mmp_magic) == EXT4_MMP_MAGIC &&
 102            ext4_mmp_csum_verify(sb, mmp))
 103                return 0;
 104        ret = -EINVAL;
 105
 106warn_exit:
 107        ext4_warning(sb, "Error %d while reading MMP block %llu",
 108                     ret, mmp_block);
 109        return ret;
 110}
 111
 112/*
 113 * Dump as much information as possible to help the admin.
 114 */
 115void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
 116                    const char *function, unsigned int line, const char *msg)
 117{
 118        __ext4_warning(sb, function, line, "%s", msg);
 119        __ext4_warning(sb, function, line,
 120                       "MMP failure info: last update time: %llu, last update "
 121                       "node: %s, last update device: %s\n",
 122                       (long long unsigned int) le64_to_cpu(mmp->mmp_time),
 123                       mmp->mmp_nodename, mmp->mmp_bdevname);
 124}
 125
 126/*
 127 * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
 128 */
 129static int kmmpd(void *data)
 130{
 131        struct super_block *sb = ((struct mmpd_data *) data)->sb;
 132        struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
 133        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 134        struct mmp_struct *mmp;
 135        ext4_fsblk_t mmp_block;
 136        u32 seq = 0;
 137        unsigned long failed_writes = 0;
 138        int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
 139        unsigned mmp_check_interval;
 140        unsigned long last_update_time;
 141        unsigned long diff;
 142        int retval;
 143
 144        mmp_block = le64_to_cpu(es->s_mmp_block);
 145        mmp = (struct mmp_struct *)(bh->b_data);
 146        mmp->mmp_time = cpu_to_le64(get_seconds());
 147        /*
 148         * Start with the higher mmp_check_interval and reduce it if
 149         * the MMP block is being updated on time.
 150         */
 151        mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
 152                                 EXT4_MMP_MIN_CHECK_INTERVAL);
 153        mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
 154        bdevname(bh->b_bdev, mmp->mmp_bdevname);
 155
 156        memcpy(mmp->mmp_nodename, init_utsname()->nodename,
 157               sizeof(mmp->mmp_nodename));
 158
 159        while (!kthread_should_stop()) {
 160                if (++seq > EXT4_MMP_SEQ_MAX)
 161                        seq = 1;
 162
 163                mmp->mmp_seq = cpu_to_le32(seq);
 164                mmp->mmp_time = cpu_to_le64(get_seconds());
 165                last_update_time = jiffies;
 166
 167                retval = write_mmp_block(sb, bh);
 168                /*
 169                 * Don't spew too many error messages. Print one every
 170                 * (s_mmp_update_interval * 60) seconds.
 171                 */
 172                if (retval) {
 173                        if ((failed_writes % 60) == 0)
 174                                ext4_error(sb, "Error writing to MMP block");
 175                        failed_writes++;
 176                }
 177
 178                if (!(le32_to_cpu(es->s_feature_incompat) &
 179                    EXT4_FEATURE_INCOMPAT_MMP)) {
 180                        ext4_warning(sb, "kmmpd being stopped since MMP feature"
 181                                     " has been disabled.");
 182                        EXT4_SB(sb)->s_mmp_tsk = NULL;
 183                        goto failed;
 184                }
 185
 186                if (sb->s_flags & MS_RDONLY) {
 187                        ext4_warning(sb, "kmmpd being stopped since filesystem "
 188                                     "has been remounted as readonly.");
 189                        EXT4_SB(sb)->s_mmp_tsk = NULL;
 190                        goto failed;
 191                }
 192
 193                diff = jiffies - last_update_time;
 194                if (diff < mmp_update_interval * HZ)
 195                        schedule_timeout_interruptible(mmp_update_interval *
 196                                                       HZ - diff);
 197
 198                /*
 199                 * We need to make sure that more than mmp_check_interval
 200                 * seconds have not passed since writing. If that has happened
 201                 * we need to check if the MMP block is as we left it.
 202                 */
 203                diff = jiffies - last_update_time;
 204                if (diff > mmp_check_interval * HZ) {
 205                        struct buffer_head *bh_check = NULL;
 206                        struct mmp_struct *mmp_check;
 207
 208                        retval = read_mmp_block(sb, &bh_check, mmp_block);
 209                        if (retval) {
 210                                ext4_error(sb, "error reading MMP data: %d",
 211                                           retval);
 212
 213                                EXT4_SB(sb)->s_mmp_tsk = NULL;
 214                                goto failed;
 215                        }
 216
 217                        mmp_check = (struct mmp_struct *)(bh_check->b_data);
 218                        if (mmp->mmp_seq != mmp_check->mmp_seq ||
 219                            memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
 220                                   sizeof(mmp->mmp_nodename))) {
 221                                dump_mmp_msg(sb, mmp_check,
 222                                             "Error while updating MMP info. "
 223                                             "The filesystem seems to have been"
 224                                             " multiply mounted.");
 225                                ext4_error(sb, "abort");
 226                                goto failed;
 227                        }
 228                        put_bh(bh_check);
 229                }
 230
 231                 /*
 232                 * Adjust the mmp_check_interval depending on how much time
 233                 * it took for the MMP block to be written.
 234                 */
 235                mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
 236                                             EXT4_MMP_MAX_CHECK_INTERVAL),
 237                                         EXT4_MMP_MIN_CHECK_INTERVAL);
 238                mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
 239        }
 240
 241        /*
 242         * Unmount seems to be clean.
 243         */
 244        mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
 245        mmp->mmp_time = cpu_to_le64(get_seconds());
 246
 247        retval = write_mmp_block(sb, bh);
 248
 249failed:
 250        kfree(data);
 251        brelse(bh);
 252        return retval;
 253}
 254
 255/*
 256 * Get a random new sequence number but make sure it is not greater than
 257 * EXT4_MMP_SEQ_MAX.
 258 */
 259static unsigned int mmp_new_seq(void)
 260{
 261        u32 new_seq;
 262
 263        do {
 264                new_seq = prandom_u32();
 265        } while (new_seq > EXT4_MMP_SEQ_MAX);
 266
 267        return new_seq;
 268}
 269
 270/*
 271 * Protect the filesystem from being mounted more than once.
 272 */
 273int ext4_multi_mount_protect(struct super_block *sb,
 274                                    ext4_fsblk_t mmp_block)
 275{
 276        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 277        struct buffer_head *bh = NULL;
 278        struct mmp_struct *mmp = NULL;
 279        struct mmpd_data *mmpd_data;
 280        u32 seq;
 281        unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
 282        unsigned int wait_time = 0;
 283        int retval;
 284
 285        if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
 286            mmp_block >= ext4_blocks_count(es)) {
 287                ext4_warning(sb, "Invalid MMP block in superblock");
 288                goto failed;
 289        }
 290
 291        retval = read_mmp_block(sb, &bh, mmp_block);
 292        if (retval)
 293                goto failed;
 294
 295        mmp = (struct mmp_struct *)(bh->b_data);
 296
 297        if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
 298                mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
 299
 300        /*
 301         * If check_interval in MMP block is larger, use that instead of
 302         * update_interval from the superblock.
 303         */
 304        if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
 305                mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
 306
 307        seq = le32_to_cpu(mmp->mmp_seq);
 308        if (seq == EXT4_MMP_SEQ_CLEAN)
 309                goto skip;
 310
 311        if (seq == EXT4_MMP_SEQ_FSCK) {
 312                dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
 313                goto failed;
 314        }
 315
 316        wait_time = min(mmp_check_interval * 2 + 1,
 317                        mmp_check_interval + 60);
 318
 319        /* Print MMP interval if more than 20 secs. */
 320        if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
 321                ext4_warning(sb, "MMP interval %u higher than expected, please"
 322                             " wait.\n", wait_time * 2);
 323
 324        if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
 325                ext4_warning(sb, "MMP startup interrupted, failing mount\n");
 326                goto failed;
 327        }
 328
 329        retval = read_mmp_block(sb, &bh, mmp_block);
 330        if (retval)
 331                goto failed;
 332        mmp = (struct mmp_struct *)(bh->b_data);
 333        if (seq != le32_to_cpu(mmp->mmp_seq)) {
 334                dump_mmp_msg(sb, mmp,
 335                             "Device is already active on another node.");
 336                goto failed;
 337        }
 338
 339skip:
 340        /*
 341         * write a new random sequence number.
 342         */
 343        seq = mmp_new_seq();
 344        mmp->mmp_seq = cpu_to_le32(seq);
 345
 346        retval = write_mmp_block(sb, bh);
 347        if (retval)
 348                goto failed;
 349
 350        /*
 351         * wait for MMP interval and check mmp_seq.
 352         */
 353        if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
 354                ext4_warning(sb, "MMP startup interrupted, failing mount\n");
 355                goto failed;
 356        }
 357
 358        retval = read_mmp_block(sb, &bh, mmp_block);
 359        if (retval)
 360                goto failed;
 361        mmp = (struct mmp_struct *)(bh->b_data);
 362        if (seq != le32_to_cpu(mmp->mmp_seq)) {
 363                dump_mmp_msg(sb, mmp,
 364                             "Device is already active on another node.");
 365                goto failed;
 366        }
 367
 368        mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
 369        if (!mmpd_data) {
 370                ext4_warning(sb, "not enough memory for mmpd_data");
 371                goto failed;
 372        }
 373        mmpd_data->sb = sb;
 374        mmpd_data->bh = bh;
 375
 376        /*
 377         * Start a kernel thread to update the MMP block periodically.
 378         */
 379        EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
 380                                             bdevname(bh->b_bdev,
 381                                                      mmp->mmp_bdevname));
 382        if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
 383                EXT4_SB(sb)->s_mmp_tsk = NULL;
 384                kfree(mmpd_data);
 385                ext4_warning(sb, "Unable to create kmmpd thread for %s.",
 386                             sb->s_id);
 387                goto failed;
 388        }
 389
 390        return 0;
 391
 392failed:
 393        brelse(bh);
 394        return 1;
 395}
 396
 397
 398