linux/fs/ext4/mmp.c
<<
>>
Prefs
   1#include <linux/fs.h>
   2#include <linux/random.h>
   3#include <linux/buffer_head.h>
   4#include <linux/utsname.h>
   5#include <linux/kthread.h>
   6
   7#include "ext4.h"
   8
   9/* Checksumming functions */
  10static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
  11{
  12        struct ext4_sb_info *sbi = EXT4_SB(sb);
  13        int offset = offsetof(struct mmp_struct, mmp_checksum);
  14        __u32 csum;
  15
  16        csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
  17
  18        return cpu_to_le32(csum);
  19}
  20
  21static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
  22{
  23        if (!ext4_has_metadata_csum(sb))
  24                return 1;
  25
  26        return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
  27}
  28
  29static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
  30{
  31        if (!ext4_has_metadata_csum(sb))
  32                return;
  33
  34        mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
  35}
  36
  37/*
  38 * Write the MMP block using WRITE_SYNC to try to get the block on-disk
  39 * faster.
  40 */
  41static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
  42{
  43        struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
  44
  45        /*
  46         * We protect against freezing so that we don't create dirty buffers
  47         * on frozen filesystem.
  48         */
  49        sb_start_write(sb);
  50        ext4_mmp_csum_set(sb, mmp);
  51        mark_buffer_dirty(bh);
  52        lock_buffer(bh);
  53        bh->b_end_io = end_buffer_write_sync;
  54        get_bh(bh);
  55        submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
  56        wait_on_buffer(bh);
  57        sb_end_write(sb);
  58        if (unlikely(!buffer_uptodate(bh)))
  59                return 1;
  60
  61        return 0;
  62}
  63
  64/*
  65 * Read the MMP block. It _must_ be read from disk and hence we clear the
  66 * uptodate flag on the buffer.
  67 */
  68static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
  69                          ext4_fsblk_t mmp_block)
  70{
  71        struct mmp_struct *mmp;
  72
  73        if (*bh)
  74                clear_buffer_uptodate(*bh);
  75
  76        /* This would be sb_bread(sb, mmp_block), except we need to be sure
  77         * that the MD RAID device cache has been bypassed, and that the read
  78         * is not blocked in the elevator. */
  79        if (!*bh)
  80                *bh = sb_getblk(sb, mmp_block);
  81        if (!*bh)
  82                return -ENOMEM;
  83        if (*bh) {
  84                get_bh(*bh);
  85                lock_buffer(*bh);
  86                (*bh)->b_end_io = end_buffer_read_sync;
  87                submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
  88                wait_on_buffer(*bh);
  89                if (!buffer_uptodate(*bh)) {
  90                        brelse(*bh);
  91                        *bh = NULL;
  92                }
  93        }
  94        if (unlikely(!*bh)) {
  95                ext4_warning(sb, "Error while reading MMP block %llu",
  96                             mmp_block);
  97                return -EIO;
  98        }
  99
 100        mmp = (struct mmp_struct *)((*bh)->b_data);
 101        if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC ||
 102            !ext4_mmp_csum_verify(sb, mmp))
 103                return -EINVAL;
 104
 105        return 0;
 106}
 107
 108/*
 109 * Dump as much information as possible to help the admin.
 110 */
 111void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
 112                    const char *function, unsigned int line, const char *msg)
 113{
 114        __ext4_warning(sb, function, line, msg);
 115        __ext4_warning(sb, function, line,
 116                       "MMP failure info: last update time: %llu, last update "
 117                       "node: %s, last update device: %s\n",
 118                       (long long unsigned int) le64_to_cpu(mmp->mmp_time),
 119                       mmp->mmp_nodename, mmp->mmp_bdevname);
 120}
 121
 122/*
 123 * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
 124 */
 125static int kmmpd(void *data)
 126{
 127        struct super_block *sb = ((struct mmpd_data *) data)->sb;
 128        struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
 129        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 130        struct mmp_struct *mmp;
 131        ext4_fsblk_t mmp_block;
 132        u32 seq = 0;
 133        unsigned long failed_writes = 0;
 134        int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
 135        unsigned mmp_check_interval;
 136        unsigned long last_update_time;
 137        unsigned long diff;
 138        int retval;
 139
 140        mmp_block = le64_to_cpu(es->s_mmp_block);
 141        mmp = (struct mmp_struct *)(bh->b_data);
 142        mmp->mmp_time = cpu_to_le64(get_seconds());
 143        /*
 144         * Start with the higher mmp_check_interval and reduce it if
 145         * the MMP block is being updated on time.
 146         */
 147        mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
 148                                 EXT4_MMP_MIN_CHECK_INTERVAL);
 149        mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
 150        bdevname(bh->b_bdev, mmp->mmp_bdevname);
 151
 152        memcpy(mmp->mmp_nodename, init_utsname()->nodename,
 153               sizeof(mmp->mmp_nodename));
 154
 155        while (!kthread_should_stop()) {
 156                if (++seq > EXT4_MMP_SEQ_MAX)
 157                        seq = 1;
 158
 159                mmp->mmp_seq = cpu_to_le32(seq);
 160                mmp->mmp_time = cpu_to_le64(get_seconds());
 161                last_update_time = jiffies;
 162
 163                retval = write_mmp_block(sb, bh);
 164                /*
 165                 * Don't spew too many error messages. Print one every
 166                 * (s_mmp_update_interval * 60) seconds.
 167                 */
 168                if (retval) {
 169                        if ((failed_writes % 60) == 0)
 170                                ext4_error(sb, "Error writing to MMP block");
 171                        failed_writes++;
 172                }
 173
 174                if (!(le32_to_cpu(es->s_feature_incompat) &
 175                    EXT4_FEATURE_INCOMPAT_MMP)) {
 176                        ext4_warning(sb, "kmmpd being stopped since MMP feature"
 177                                     " has been disabled.");
 178                        EXT4_SB(sb)->s_mmp_tsk = NULL;
 179                        goto failed;
 180                }
 181
 182                if (sb->s_flags & MS_RDONLY) {
 183                        ext4_warning(sb, "kmmpd being stopped since filesystem "
 184                                     "has been remounted as readonly.");
 185                        EXT4_SB(sb)->s_mmp_tsk = NULL;
 186                        goto failed;
 187                }
 188
 189                diff = jiffies - last_update_time;
 190                if (diff < mmp_update_interval * HZ)
 191                        schedule_timeout_interruptible(mmp_update_interval *
 192                                                       HZ - diff);
 193
 194                /*
 195                 * We need to make sure that more than mmp_check_interval
 196                 * seconds have not passed since writing. If that has happened
 197                 * we need to check if the MMP block is as we left it.
 198                 */
 199                diff = jiffies - last_update_time;
 200                if (diff > mmp_check_interval * HZ) {
 201                        struct buffer_head *bh_check = NULL;
 202                        struct mmp_struct *mmp_check;
 203
 204                        retval = read_mmp_block(sb, &bh_check, mmp_block);
 205                        if (retval) {
 206                                ext4_error(sb, "error reading MMP data: %d",
 207                                           retval);
 208
 209                                EXT4_SB(sb)->s_mmp_tsk = NULL;
 210                                goto failed;
 211                        }
 212
 213                        mmp_check = (struct mmp_struct *)(bh_check->b_data);
 214                        if (mmp->mmp_seq != mmp_check->mmp_seq ||
 215                            memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
 216                                   sizeof(mmp->mmp_nodename))) {
 217                                dump_mmp_msg(sb, mmp_check,
 218                                             "Error while updating MMP info. "
 219                                             "The filesystem seems to have been"
 220                                             " multiply mounted.");
 221                                ext4_error(sb, "abort");
 222                                goto failed;
 223                        }
 224                        put_bh(bh_check);
 225                }
 226
 227                 /*
 228                 * Adjust the mmp_check_interval depending on how much time
 229                 * it took for the MMP block to be written.
 230                 */
 231                mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
 232                                             EXT4_MMP_MAX_CHECK_INTERVAL),
 233                                         EXT4_MMP_MIN_CHECK_INTERVAL);
 234                mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
 235        }
 236
 237        /*
 238         * Unmount seems to be clean.
 239         */
 240        mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
 241        mmp->mmp_time = cpu_to_le64(get_seconds());
 242
 243        retval = write_mmp_block(sb, bh);
 244
 245failed:
 246        kfree(data);
 247        brelse(bh);
 248        return retval;
 249}
 250
 251/*
 252 * Get a random new sequence number but make sure it is not greater than
 253 * EXT4_MMP_SEQ_MAX.
 254 */
 255static unsigned int mmp_new_seq(void)
 256{
 257        u32 new_seq;
 258
 259        do {
 260                new_seq = prandom_u32();
 261        } while (new_seq > EXT4_MMP_SEQ_MAX);
 262
 263        return new_seq;
 264}
 265
 266/*
 267 * Protect the filesystem from being mounted more than once.
 268 */
 269int ext4_multi_mount_protect(struct super_block *sb,
 270                                    ext4_fsblk_t mmp_block)
 271{
 272        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 273        struct buffer_head *bh = NULL;
 274        struct mmp_struct *mmp = NULL;
 275        struct mmpd_data *mmpd_data;
 276        u32 seq;
 277        unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
 278        unsigned int wait_time = 0;
 279        int retval;
 280
 281        if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
 282            mmp_block >= ext4_blocks_count(es)) {
 283                ext4_warning(sb, "Invalid MMP block in superblock");
 284                goto failed;
 285        }
 286
 287        retval = read_mmp_block(sb, &bh, mmp_block);
 288        if (retval)
 289                goto failed;
 290
 291        mmp = (struct mmp_struct *)(bh->b_data);
 292
 293        if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
 294                mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
 295
 296        /*
 297         * If check_interval in MMP block is larger, use that instead of
 298         * update_interval from the superblock.
 299         */
 300        if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
 301                mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
 302
 303        seq = le32_to_cpu(mmp->mmp_seq);
 304        if (seq == EXT4_MMP_SEQ_CLEAN)
 305                goto skip;
 306
 307        if (seq == EXT4_MMP_SEQ_FSCK) {
 308                dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
 309                goto failed;
 310        }
 311
 312        wait_time = min(mmp_check_interval * 2 + 1,
 313                        mmp_check_interval + 60);
 314
 315        /* Print MMP interval if more than 20 secs. */
 316        if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
 317                ext4_warning(sb, "MMP interval %u higher than expected, please"
 318                             " wait.\n", wait_time * 2);
 319
 320        if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
 321                ext4_warning(sb, "MMP startup interrupted, failing mount\n");
 322                goto failed;
 323        }
 324
 325        retval = read_mmp_block(sb, &bh, mmp_block);
 326        if (retval)
 327                goto failed;
 328        mmp = (struct mmp_struct *)(bh->b_data);
 329        if (seq != le32_to_cpu(mmp->mmp_seq)) {
 330                dump_mmp_msg(sb, mmp,
 331                             "Device is already active on another node.");
 332                goto failed;
 333        }
 334
 335skip:
 336        /*
 337         * write a new random sequence number.
 338         */
 339        seq = mmp_new_seq();
 340        mmp->mmp_seq = cpu_to_le32(seq);
 341
 342        retval = write_mmp_block(sb, bh);
 343        if (retval)
 344                goto failed;
 345
 346        /*
 347         * wait for MMP interval and check mmp_seq.
 348         */
 349        if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
 350                ext4_warning(sb, "MMP startup interrupted, failing mount\n");
 351                goto failed;
 352        }
 353
 354        retval = read_mmp_block(sb, &bh, mmp_block);
 355        if (retval)
 356                goto failed;
 357        mmp = (struct mmp_struct *)(bh->b_data);
 358        if (seq != le32_to_cpu(mmp->mmp_seq)) {
 359                dump_mmp_msg(sb, mmp,
 360                             "Device is already active on another node.");
 361                goto failed;
 362        }
 363
 364        mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
 365        if (!mmpd_data) {
 366                ext4_warning(sb, "not enough memory for mmpd_data");
 367                goto failed;
 368        }
 369        mmpd_data->sb = sb;
 370        mmpd_data->bh = bh;
 371
 372        /*
 373         * Start a kernel thread to update the MMP block periodically.
 374         */
 375        EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
 376                                             bdevname(bh->b_bdev,
 377                                                      mmp->mmp_bdevname));
 378        if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
 379                EXT4_SB(sb)->s_mmp_tsk = NULL;
 380                kfree(mmpd_data);
 381                ext4_warning(sb, "Unable to create kmmpd thread for %s.",
 382                             sb->s_id);
 383                goto failed;
 384        }
 385
 386        return 0;
 387
 388failed:
 389        brelse(bh);
 390        return 1;
 391}
 392
 393
 394