linux/drivers/md/bitmap.c
<<
>>
Prefs
   1/*
   2 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
   3 *
   4 * bitmap_create  - sets up the bitmap structure
   5 * bitmap_destroy - destroys the bitmap structure
   6 *
   7 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
   8 * - added disk storage for bitmap
   9 * - changes to allow various bitmap chunk sizes
  10 */
  11
  12/*
  13 * Still to do:
  14 *
  15 * flush after percent set rather than just time based. (maybe both).
  16 */
  17
  18#include <linux/blkdev.h>
  19#include <linux/module.h>
  20#include <linux/errno.h>
  21#include <linux/slab.h>
  22#include <linux/init.h>
  23#include <linux/timer.h>
  24#include <linux/sched.h>
  25#include <linux/list.h>
  26#include <linux/file.h>
  27#include <linux/mount.h>
  28#include <linux/buffer_head.h>
  29#include <linux/seq_file.h>
  30#include "md.h"
  31#include "bitmap.h"
  32
  33static inline char *bmname(struct bitmap *bitmap)
  34{
  35        return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
  36}
  37
  38/*
  39 * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
  40 *
  41 * 1) check to see if this page is allocated, if it's not then try to alloc
  42 * 2) if the alloc fails, set the page's hijacked flag so we'll use the
  43 *    page pointer directly as a counter
  44 *
  45 * if we find our page, we increment the page's refcount so that it stays
  46 * allocated while we're using it
  47 */
  48static int bitmap_checkpage(struct bitmap_counts *bitmap,
  49                            unsigned long page, int create)
  50__releases(bitmap->lock)
  51__acquires(bitmap->lock)
  52{
  53        unsigned char *mappage;
  54
  55        if (page >= bitmap->pages) {
  56                /* This can happen if bitmap_start_sync goes beyond
  57                 * End-of-device while looking for a whole page.
  58                 * It is harmless.
  59                 */
  60                return -EINVAL;
  61        }
  62
  63        if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
  64                return 0;
  65
  66        if (bitmap->bp[page].map) /* page is already allocated, just return */
  67                return 0;
  68
  69        if (!create)
  70                return -ENOENT;
  71
  72        /* this page has not been allocated yet */
  73
  74        spin_unlock_irq(&bitmap->lock);
  75        /* It is possible that this is being called inside a
  76         * prepare_to_wait/finish_wait loop from raid5c:make_request().
  77         * In general it is not permitted to sleep in that context as it
  78         * can cause the loop to spin freely.
  79         * That doesn't apply here as we can only reach this point
  80         * once with any loop.
  81         * When this function completes, either bp[page].map or
  82         * bp[page].hijacked.  In either case, this function will
  83         * abort before getting to this point again.  So there is
  84         * no risk of a free-spin, and so it is safe to assert
  85         * that sleeping here is allowed.
  86         */
  87        sched_annotate_sleep();
  88        mappage = kzalloc(PAGE_SIZE, GFP_NOIO);
  89        spin_lock_irq(&bitmap->lock);
  90
  91        if (mappage == NULL) {
  92                pr_debug("md/bitmap: map page allocation failed, hijacking\n");
  93                /* failed - set the hijacked flag so that we can use the
  94                 * pointer as a counter */
  95                if (!bitmap->bp[page].map)
  96                        bitmap->bp[page].hijacked = 1;
  97        } else if (bitmap->bp[page].map ||
  98                   bitmap->bp[page].hijacked) {
  99                /* somebody beat us to getting the page */
 100                kfree(mappage);
 101                return 0;
 102        } else {
 103
 104                /* no page was in place and we have one, so install it */
 105
 106                bitmap->bp[page].map = mappage;
 107                bitmap->missing_pages--;
 108        }
 109        return 0;
 110}
 111
 112/* if page is completely empty, put it back on the free list, or dealloc it */
 113/* if page was hijacked, unmark the flag so it might get alloced next time */
 114/* Note: lock should be held when calling this */
 115static void bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page)
 116{
 117        char *ptr;
 118
 119        if (bitmap->bp[page].count) /* page is still busy */
 120                return;
 121
 122        /* page is no longer in use, it can be released */
 123
 124        if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
 125                bitmap->bp[page].hijacked = 0;
 126                bitmap->bp[page].map = NULL;
 127        } else {
 128                /* normal case, free the page */
 129                ptr = bitmap->bp[page].map;
 130                bitmap->bp[page].map = NULL;
 131                bitmap->missing_pages++;
 132                kfree(ptr);
 133        }
 134}
 135
 136/*
 137 * bitmap file handling - read and write the bitmap file and its superblock
 138 */
 139
 140/*
 141 * basic page I/O operations
 142 */
 143
 144/* IO operations when bitmap is stored near all superblocks */
 145static int read_sb_page(struct mddev *mddev, loff_t offset,
 146                        struct page *page,
 147                        unsigned long index, int size)
 148{
 149        /* choose a good rdev and read the page from there */
 150
 151        struct md_rdev *rdev;
 152        sector_t target;
 153
 154        rdev_for_each(rdev, mddev) {
 155                if (! test_bit(In_sync, &rdev->flags)
 156                    || test_bit(Faulty, &rdev->flags))
 157                        continue;
 158
 159                target = offset + index * (PAGE_SIZE/512);
 160
 161                if (sync_page_io(rdev, target,
 162                                 roundup(size, bdev_logical_block_size(rdev->bdev)),
 163                                 page, READ, true)) {
 164                        page->index = index;
 165                        return 0;
 166                }
 167        }
 168        return -EIO;
 169}
 170
 171static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
 172{
 173        /* Iterate the disks of an mddev, using rcu to protect access to the
 174         * linked list, and raising the refcount of devices we return to ensure
 175         * they don't disappear while in use.
 176         * As devices are only added or removed when raid_disk is < 0 and
 177         * nr_pending is 0 and In_sync is clear, the entries we return will
 178         * still be in the same position on the list when we re-enter
 179         * list_for_each_entry_continue_rcu.
 180         *
 181         * Note that if entered with 'rdev == NULL' to start at the
 182         * beginning, we temporarily assign 'rdev' to an address which
 183         * isn't really an rdev, but which can be used by
 184         * list_for_each_entry_continue_rcu() to find the first entry.
 185         */
 186        rcu_read_lock();
 187        if (rdev == NULL)
 188                /* start at the beginning */
 189                rdev = list_entry(&mddev->disks, struct md_rdev, same_set);
 190        else {
 191                /* release the previous rdev and start from there. */
 192                rdev_dec_pending(rdev, mddev);
 193        }
 194        list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) {
 195                if (rdev->raid_disk >= 0 &&
 196                    !test_bit(Faulty, &rdev->flags)) {
 197                        /* this is a usable devices */
 198                        atomic_inc(&rdev->nr_pending);
 199                        rcu_read_unlock();
 200                        return rdev;
 201                }
 202        }
 203        rcu_read_unlock();
 204        return NULL;
 205}
 206
 207static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
 208{
 209        struct md_rdev *rdev = NULL;
 210        struct block_device *bdev;
 211        struct mddev *mddev = bitmap->mddev;
 212        struct bitmap_storage *store = &bitmap->storage;
 213        int node_offset = 0;
 214
 215        if (mddev_is_clustered(bitmap->mddev))
 216                node_offset = bitmap->cluster_slot * store->file_pages;
 217
 218        while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
 219                int size = PAGE_SIZE;
 220                loff_t offset = mddev->bitmap_info.offset;
 221
 222                bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
 223
 224                if (page->index == store->file_pages-1) {
 225                        int last_page_size = store->bytes & (PAGE_SIZE-1);
 226                        if (last_page_size == 0)
 227                                last_page_size = PAGE_SIZE;
 228                        size = roundup(last_page_size,
 229                                       bdev_logical_block_size(bdev));
 230                }
 231                /* Just make sure we aren't corrupting data or
 232                 * metadata
 233                 */
 234                if (mddev->external) {
 235                        /* Bitmap could be anywhere. */
 236                        if (rdev->sb_start + offset + (page->index
 237                                                       * (PAGE_SIZE/512))
 238                            > rdev->data_offset
 239                            &&
 240                            rdev->sb_start + offset
 241                            < (rdev->data_offset + mddev->dev_sectors
 242                             + (PAGE_SIZE/512)))
 243                                goto bad_alignment;
 244                } else if (offset < 0) {
 245                        /* DATA  BITMAP METADATA  */
 246                        if (offset
 247                            + (long)(page->index * (PAGE_SIZE/512))
 248                            + size/512 > 0)
 249                                /* bitmap runs in to metadata */
 250                                goto bad_alignment;
 251                        if (rdev->data_offset + mddev->dev_sectors
 252                            > rdev->sb_start + offset)
 253                                /* data runs in to bitmap */
 254                                goto bad_alignment;
 255                } else if (rdev->sb_start < rdev->data_offset) {
 256                        /* METADATA BITMAP DATA */
 257                        if (rdev->sb_start
 258                            + offset
 259                            + page->index*(PAGE_SIZE/512) + size/512
 260                            > rdev->data_offset)
 261                                /* bitmap runs in to data */
 262                                goto bad_alignment;
 263                } else {
 264                        /* DATA METADATA BITMAP - no problems */
 265                }
 266                md_super_write(mddev, rdev,
 267                               rdev->sb_start + offset
 268                               + page->index * (PAGE_SIZE/512),
 269                               size,
 270                               page);
 271        }
 272
 273        if (wait)
 274                md_super_wait(mddev);
 275        return 0;
 276
 277 bad_alignment:
 278        return -EINVAL;
 279}
 280
 281static void bitmap_file_kick(struct bitmap *bitmap);
 282/*
 283 * write out a page to a file
 284 */
 285static void write_page(struct bitmap *bitmap, struct page *page, int wait)
 286{
 287        struct buffer_head *bh;
 288
 289        if (bitmap->storage.file == NULL) {
 290                switch (write_sb_page(bitmap, page, wait)) {
 291                case -EINVAL:
 292                        set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
 293                }
 294        } else {
 295
 296                bh = page_buffers(page);
 297
 298                while (bh && bh->b_blocknr) {
 299                        atomic_inc(&bitmap->pending_writes);
 300                        set_buffer_locked(bh);
 301                        set_buffer_mapped(bh);
 302                        submit_bh(WRITE | REQ_SYNC, bh);
 303                        bh = bh->b_this_page;
 304                }
 305
 306                if (wait)
 307                        wait_event(bitmap->write_wait,
 308                                   atomic_read(&bitmap->pending_writes)==0);
 309        }
 310        if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
 311                bitmap_file_kick(bitmap);
 312}
 313
 314static void end_bitmap_write(struct buffer_head *bh, int uptodate)
 315{
 316        struct bitmap *bitmap = bh->b_private;
 317
 318        if (!uptodate)
 319                set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
 320        if (atomic_dec_and_test(&bitmap->pending_writes))
 321                wake_up(&bitmap->write_wait);
 322}
 323
 324/* copied from buffer.c */
 325static void
 326__clear_page_buffers(struct page *page)
 327{
 328        ClearPagePrivate(page);
 329        set_page_private(page, 0);
 330        page_cache_release(page);
 331}
 332static void free_buffers(struct page *page)
 333{
 334        struct buffer_head *bh;
 335
 336        if (!PagePrivate(page))
 337                return;
 338
 339        bh = page_buffers(page);
 340        while (bh) {
 341                struct buffer_head *next = bh->b_this_page;
 342                free_buffer_head(bh);
 343                bh = next;
 344        }
 345        __clear_page_buffers(page);
 346        put_page(page);
 347}
 348
 349/* read a page from a file.
 350 * We both read the page, and attach buffers to the page to record the
 351 * address of each block (using bmap).  These addresses will be used
 352 * to write the block later, completely bypassing the filesystem.
 353 * This usage is similar to how swap files are handled, and allows us
 354 * to write to a file with no concerns of memory allocation failing.
 355 */
 356static int read_page(struct file *file, unsigned long index,
 357                     struct bitmap *bitmap,
 358                     unsigned long count,
 359                     struct page *page)
 360{
 361        int ret = 0;
 362        struct inode *inode = file_inode(file);
 363        struct buffer_head *bh;
 364        sector_t block;
 365
 366        pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
 367                 (unsigned long long)index << PAGE_SHIFT);
 368
 369        bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
 370        if (!bh) {
 371                ret = -ENOMEM;
 372                goto out;
 373        }
 374        attach_page_buffers(page, bh);
 375        block = index << (PAGE_SHIFT - inode->i_blkbits);
 376        while (bh) {
 377                if (count == 0)
 378                        bh->b_blocknr = 0;
 379                else {
 380                        bh->b_blocknr = bmap(inode, block);
 381                        if (bh->b_blocknr == 0) {
 382                                /* Cannot use this file! */
 383                                ret = -EINVAL;
 384                                goto out;
 385                        }
 386                        bh->b_bdev = inode->i_sb->s_bdev;
 387                        if (count < (1<<inode->i_blkbits))
 388                                count = 0;
 389                        else
 390                                count -= (1<<inode->i_blkbits);
 391
 392                        bh->b_end_io = end_bitmap_write;
 393                        bh->b_private = bitmap;
 394                        atomic_inc(&bitmap->pending_writes);
 395                        set_buffer_locked(bh);
 396                        set_buffer_mapped(bh);
 397                        submit_bh(READ, bh);
 398                }
 399                block++;
 400                bh = bh->b_this_page;
 401        }
 402        page->index = index;
 403
 404        wait_event(bitmap->write_wait,
 405                   atomic_read(&bitmap->pending_writes)==0);
 406        if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
 407                ret = -EIO;
 408out:
 409        if (ret)
 410                printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %d\n",
 411                        (int)PAGE_SIZE,
 412                        (unsigned long long)index << PAGE_SHIFT,
 413                        ret);
 414        return ret;
 415}
 416
 417/*
 418 * bitmap file superblock operations
 419 */
 420
 421/* update the event counter and sync the superblock to disk */
 422void bitmap_update_sb(struct bitmap *bitmap)
 423{
 424        bitmap_super_t *sb;
 425
 426        if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
 427                return;
 428        if (bitmap->mddev->bitmap_info.external)
 429                return;
 430        if (!bitmap->storage.sb_page) /* no superblock */
 431                return;
 432        sb = kmap_atomic(bitmap->storage.sb_page);
 433        sb->events = cpu_to_le64(bitmap->mddev->events);
 434        if (bitmap->mddev->events < bitmap->events_cleared)
 435                /* rocking back to read-only */
 436                bitmap->events_cleared = bitmap->mddev->events;
 437        sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
 438        sb->state = cpu_to_le32(bitmap->flags);
 439        /* Just in case these have been changed via sysfs: */
 440        sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
 441        sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
 442        /* This might have been changed by a reshape */
 443        sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
 444        sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize);
 445        sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes);
 446        sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
 447                                           bitmap_info.space);
 448        kunmap_atomic(sb);
 449        write_page(bitmap, bitmap->storage.sb_page, 1);
 450}
 451
 452/* print out the bitmap file superblock */
 453void bitmap_print_sb(struct bitmap *bitmap)
 454{
 455        bitmap_super_t *sb;
 456
 457        if (!bitmap || !bitmap->storage.sb_page)
 458                return;
 459        sb = kmap_atomic(bitmap->storage.sb_page);
 460        printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
 461        printk(KERN_DEBUG "         magic: %08x\n", le32_to_cpu(sb->magic));
 462        printk(KERN_DEBUG "       version: %d\n", le32_to_cpu(sb->version));
 463        printk(KERN_DEBUG "          uuid: %08x.%08x.%08x.%08x\n",
 464                                        *(__u32 *)(sb->uuid+0),
 465                                        *(__u32 *)(sb->uuid+4),
 466                                        *(__u32 *)(sb->uuid+8),
 467                                        *(__u32 *)(sb->uuid+12));
 468        printk(KERN_DEBUG "        events: %llu\n",
 469                        (unsigned long long) le64_to_cpu(sb->events));
 470        printk(KERN_DEBUG "events cleared: %llu\n",
 471                        (unsigned long long) le64_to_cpu(sb->events_cleared));
 472        printk(KERN_DEBUG "         state: %08x\n", le32_to_cpu(sb->state));
 473        printk(KERN_DEBUG "     chunksize: %d B\n", le32_to_cpu(sb->chunksize));
 474        printk(KERN_DEBUG "  daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
 475        printk(KERN_DEBUG "     sync size: %llu KB\n",
 476                        (unsigned long long)le64_to_cpu(sb->sync_size)/2);
 477        printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
 478        kunmap_atomic(sb);
 479}
 480
 481/*
 482 * bitmap_new_disk_sb
 483 * @bitmap
 484 *
 485 * This function is somewhat the reverse of bitmap_read_sb.  bitmap_read_sb
 486 * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
 487 * This function verifies 'bitmap_info' and populates the on-disk bitmap
 488 * structure, which is to be written to disk.
 489 *
 490 * Returns: 0 on success, -Exxx on error
 491 */
 492static int bitmap_new_disk_sb(struct bitmap *bitmap)
 493{
 494        bitmap_super_t *sb;
 495        unsigned long chunksize, daemon_sleep, write_behind;
 496
 497        bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 498        if (bitmap->storage.sb_page == NULL)
 499                return -ENOMEM;
 500        bitmap->storage.sb_page->index = 0;
 501
 502        sb = kmap_atomic(bitmap->storage.sb_page);
 503
 504        sb->magic = cpu_to_le32(BITMAP_MAGIC);
 505        sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
 506
 507        chunksize = bitmap->mddev->bitmap_info.chunksize;
 508        BUG_ON(!chunksize);
 509        if (!is_power_of_2(chunksize)) {
 510                kunmap_atomic(sb);
 511                printk(KERN_ERR "bitmap chunksize not a power of 2\n");
 512                return -EINVAL;
 513        }
 514        sb->chunksize = cpu_to_le32(chunksize);
 515
 516        daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
 517        if (!daemon_sleep ||
 518            (daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
 519                printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
 520                daemon_sleep = 5 * HZ;
 521        }
 522        sb->daemon_sleep = cpu_to_le32(daemon_sleep);
 523        bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
 524
 525        /*
 526         * FIXME: write_behind for RAID1.  If not specified, what
 527         * is a good choice?  We choose COUNTER_MAX / 2 arbitrarily.
 528         */
 529        write_behind = bitmap->mddev->bitmap_info.max_write_behind;
 530        if (write_behind > COUNTER_MAX)
 531                write_behind = COUNTER_MAX / 2;
 532        sb->write_behind = cpu_to_le32(write_behind);
 533        bitmap->mddev->bitmap_info.max_write_behind = write_behind;
 534
 535        /* keep the array size field of the bitmap superblock up to date */
 536        sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
 537
 538        memcpy(sb->uuid, bitmap->mddev->uuid, 16);
 539
 540        set_bit(BITMAP_STALE, &bitmap->flags);
 541        sb->state = cpu_to_le32(bitmap->flags);
 542        bitmap->events_cleared = bitmap->mddev->events;
 543        sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
 544        bitmap->mddev->bitmap_info.nodes = 0;
 545
 546        kunmap_atomic(sb);
 547
 548        return 0;
 549}
 550
 551/* read the superblock from the bitmap file and initialize some bitmap fields */
 552static int bitmap_read_sb(struct bitmap *bitmap)
 553{
 554        char *reason = NULL;
 555        bitmap_super_t *sb;
 556        unsigned long chunksize, daemon_sleep, write_behind;
 557        unsigned long long events;
 558        int nodes = 0;
 559        unsigned long sectors_reserved = 0;
 560        int err = -EINVAL;
 561        struct page *sb_page;
 562        loff_t offset = bitmap->mddev->bitmap_info.offset;
 563
 564        if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
 565                chunksize = 128 * 1024 * 1024;
 566                daemon_sleep = 5 * HZ;
 567                write_behind = 0;
 568                set_bit(BITMAP_STALE, &bitmap->flags);
 569                err = 0;
 570                goto out_no_sb;
 571        }
 572        /* page 0 is the superblock, read it... */
 573        sb_page = alloc_page(GFP_KERNEL);
 574        if (!sb_page)
 575                return -ENOMEM;
 576        bitmap->storage.sb_page = sb_page;
 577
 578re_read:
 579        /* If cluster_slot is set, the cluster is setup */
 580        if (bitmap->cluster_slot >= 0) {
 581                sector_t bm_blocks = bitmap->mddev->resync_max_sectors;
 582
 583                sector_div(bm_blocks,
 584                           bitmap->mddev->bitmap_info.chunksize >> 9);
 585                /* bits to bytes */
 586                bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
 587                /* to 4k blocks */
 588                bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
 589                offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3));
 590                pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
 591                        bitmap->cluster_slot, offset);
 592        }
 593
 594        if (bitmap->storage.file) {
 595                loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
 596                int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
 597
 598                err = read_page(bitmap->storage.file, 0,
 599                                bitmap, bytes, sb_page);
 600        } else {
 601                err = read_sb_page(bitmap->mddev,
 602                                   offset,
 603                                   sb_page,
 604                                   0, sizeof(bitmap_super_t));
 605        }
 606        if (err)
 607                return err;
 608
 609        err = -EINVAL;
 610        sb = kmap_atomic(sb_page);
 611
 612        chunksize = le32_to_cpu(sb->chunksize);
 613        daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
 614        write_behind = le32_to_cpu(sb->write_behind);
 615        sectors_reserved = le32_to_cpu(sb->sectors_reserved);
 616        /* XXX: This is a hack to ensure that we don't use clustering
 617         *  in case:
 618         *      - dm-raid is in use and
 619         *      - the nodes written in bitmap_sb is erroneous.
 620         */
 621        if (!bitmap->mddev->sync_super) {
 622                nodes = le32_to_cpu(sb->nodes);
 623                strlcpy(bitmap->mddev->bitmap_info.cluster_name,
 624                                sb->cluster_name, 64);
 625        }
 626
 627        /* verify that the bitmap-specific fields are valid */
 628        if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
 629                reason = "bad magic";
 630        else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
 631                 le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
 632                reason = "unrecognized superblock version";
 633        else if (chunksize < 512)
 634                reason = "bitmap chunksize too small";
 635        else if (!is_power_of_2(chunksize))
 636                reason = "bitmap chunksize not a power of 2";
 637        else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
 638                reason = "daemon sleep period out of range";
 639        else if (write_behind > COUNTER_MAX)
 640                reason = "write-behind limit out of range (0 - 16383)";
 641        if (reason) {
 642                printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
 643                        bmname(bitmap), reason);
 644                goto out;
 645        }
 646
 647        /* keep the array size field of the bitmap superblock up to date */
 648        sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
 649
 650        if (bitmap->mddev->persistent) {
 651                /*
 652                 * We have a persistent array superblock, so compare the
 653                 * bitmap's UUID and event counter to the mddev's
 654                 */
 655                if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
 656                        printk(KERN_INFO
 657                               "%s: bitmap superblock UUID mismatch\n",
 658                               bmname(bitmap));
 659                        goto out;
 660                }
 661                events = le64_to_cpu(sb->events);
 662                if (!nodes && (events < bitmap->mddev->events)) {
 663                        printk(KERN_INFO
 664                               "%s: bitmap file is out of date (%llu < %llu) "
 665                               "-- forcing full recovery\n",
 666                               bmname(bitmap), events,
 667                               (unsigned long long) bitmap->mddev->events);
 668                        set_bit(BITMAP_STALE, &bitmap->flags);
 669                }
 670        }
 671
 672        /* assign fields using values from superblock */
 673        bitmap->flags |= le32_to_cpu(sb->state);
 674        if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
 675                set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
 676        bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
 677        strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
 678        err = 0;
 679
 680out:
 681        kunmap_atomic(sb);
 682        /* Assiging chunksize is required for "re_read" */
 683        bitmap->mddev->bitmap_info.chunksize = chunksize;
 684        if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
 685                err = md_setup_cluster(bitmap->mddev, nodes);
 686                if (err) {
 687                        pr_err("%s: Could not setup cluster service (%d)\n",
 688                                        bmname(bitmap), err);
 689                        goto out_no_sb;
 690                }
 691                bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev);
 692                goto re_read;
 693        }
 694
 695
 696out_no_sb:
 697        if (test_bit(BITMAP_STALE, &bitmap->flags))
 698                bitmap->events_cleared = bitmap->mddev->events;
 699        bitmap->mddev->bitmap_info.chunksize = chunksize;
 700        bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
 701        bitmap->mddev->bitmap_info.max_write_behind = write_behind;
 702        bitmap->mddev->bitmap_info.nodes = nodes;
 703        if (bitmap->mddev->bitmap_info.space == 0 ||
 704            bitmap->mddev->bitmap_info.space > sectors_reserved)
 705                bitmap->mddev->bitmap_info.space = sectors_reserved;
 706        if (err) {
 707                bitmap_print_sb(bitmap);
 708                if (bitmap->cluster_slot < 0)
 709                        md_cluster_stop(bitmap->mddev);
 710        }
 711        return err;
 712}
 713
 714/*
 715 * general bitmap file operations
 716 */
 717
 718/*
 719 * on-disk bitmap:
 720 *
 721 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
 722 * file a page at a time. There's a superblock at the start of the file.
 723 */
 724/* calculate the index of the page that contains this bit */
 725static inline unsigned long file_page_index(struct bitmap_storage *store,
 726                                            unsigned long chunk)
 727{
 728        if (store->sb_page)
 729                chunk += sizeof(bitmap_super_t) << 3;
 730        return chunk >> PAGE_BIT_SHIFT;
 731}
 732
 733/* calculate the (bit) offset of this bit within a page */
 734static inline unsigned long file_page_offset(struct bitmap_storage *store,
 735                                             unsigned long chunk)
 736{
 737        if (store->sb_page)
 738                chunk += sizeof(bitmap_super_t) << 3;
 739        return chunk & (PAGE_BITS - 1);
 740}
 741
 742/*
 743 * return a pointer to the page in the filemap that contains the given bit
 744 *
 745 */
 746static inline struct page *filemap_get_page(struct bitmap_storage *store,
 747                                            unsigned long chunk)
 748{
 749        if (file_page_index(store, chunk) >= store->file_pages)
 750                return NULL;
 751        return store->filemap[file_page_index(store, chunk)];
 752}
 753
 754static int bitmap_storage_alloc(struct bitmap_storage *store,
 755                                unsigned long chunks, int with_super,
 756                                int slot_number)
 757{
 758        int pnum, offset = 0;
 759        unsigned long num_pages;
 760        unsigned long bytes;
 761
 762        bytes = DIV_ROUND_UP(chunks, 8);
 763        if (with_super)
 764                bytes += sizeof(bitmap_super_t);
 765
 766        num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
 767        offset = slot_number * (num_pages - 1);
 768
 769        store->filemap = kmalloc(sizeof(struct page *)
 770                                 * num_pages, GFP_KERNEL);
 771        if (!store->filemap)
 772                return -ENOMEM;
 773
 774        if (with_super && !store->sb_page) {
 775                store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
 776                if (store->sb_page == NULL)
 777                        return -ENOMEM;
 778        }
 779
 780        pnum = 0;
 781        if (store->sb_page) {
 782                store->filemap[0] = store->sb_page;
 783                pnum = 1;
 784                store->sb_page->index = offset;
 785        }
 786
 787        for ( ; pnum < num_pages; pnum++) {
 788                store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO);
 789                if (!store->filemap[pnum]) {
 790                        store->file_pages = pnum;
 791                        return -ENOMEM;
 792                }
 793                store->filemap[pnum]->index = pnum + offset;
 794        }
 795        store->file_pages = pnum;
 796
 797        /* We need 4 bits per page, rounded up to a multiple
 798         * of sizeof(unsigned long) */
 799        store->filemap_attr = kzalloc(
 800                roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
 801                GFP_KERNEL);
 802        if (!store->filemap_attr)
 803                return -ENOMEM;
 804
 805        store->bytes = bytes;
 806
 807        return 0;
 808}
 809
 810static void bitmap_file_unmap(struct bitmap_storage *store)
 811{
 812        struct page **map, *sb_page;
 813        int pages;
 814        struct file *file;
 815
 816        file = store->file;
 817        map = store->filemap;
 818        pages = store->file_pages;
 819        sb_page = store->sb_page;
 820
 821        while (pages--)
 822                if (map[pages] != sb_page) /* 0 is sb_page, release it below */
 823                        free_buffers(map[pages]);
 824        kfree(map);
 825        kfree(store->filemap_attr);
 826
 827        if (sb_page)
 828                free_buffers(sb_page);
 829
 830        if (file) {
 831                struct inode *inode = file_inode(file);
 832                invalidate_mapping_pages(inode->i_mapping, 0, -1);
 833                fput(file);
 834        }
 835}
 836
 837/*
 838 * bitmap_file_kick - if an error occurs while manipulating the bitmap file
 839 * then it is no longer reliable, so we stop using it and we mark the file
 840 * as failed in the superblock
 841 */
 842static void bitmap_file_kick(struct bitmap *bitmap)
 843{
 844        char *path, *ptr = NULL;
 845
 846        if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
 847                bitmap_update_sb(bitmap);
 848
 849                if (bitmap->storage.file) {
 850                        path = kmalloc(PAGE_SIZE, GFP_KERNEL);
 851                        if (path)
 852                                ptr = file_path(bitmap->storage.file,
 853                                             path, PAGE_SIZE);
 854
 855                        printk(KERN_ALERT
 856                              "%s: kicking failed bitmap file %s from array!\n",
 857                              bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
 858
 859                        kfree(path);
 860                } else
 861                        printk(KERN_ALERT
 862                               "%s: disabling internal bitmap due to errors\n",
 863                               bmname(bitmap));
 864        }
 865}
 866
 867enum bitmap_page_attr {
 868        BITMAP_PAGE_DIRTY = 0,     /* there are set bits that need to be synced */
 869        BITMAP_PAGE_PENDING = 1,   /* there are bits that are being cleaned.
 870                                    * i.e. counter is 1 or 2. */
 871        BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
 872};
 873
 874static inline void set_page_attr(struct bitmap *bitmap, int pnum,
 875                                 enum bitmap_page_attr attr)
 876{
 877        set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
 878}
 879
 880static inline void clear_page_attr(struct bitmap *bitmap, int pnum,
 881                                   enum bitmap_page_attr attr)
 882{
 883        clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
 884}
 885
 886static inline int test_page_attr(struct bitmap *bitmap, int pnum,
 887                                 enum bitmap_page_attr attr)
 888{
 889        return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
 890}
 891
 892static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum,
 893                                           enum bitmap_page_attr attr)
 894{
 895        return test_and_clear_bit((pnum<<2) + attr,
 896                                  bitmap->storage.filemap_attr);
 897}
 898/*
 899 * bitmap_file_set_bit -- called before performing a write to the md device
 900 * to set (and eventually sync) a particular bit in the bitmap file
 901 *
 902 * we set the bit immediately, then we record the page number so that
 903 * when an unplug occurs, we can flush the dirty pages out to disk
 904 */
 905static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
 906{
 907        unsigned long bit;
 908        struct page *page;
 909        void *kaddr;
 910        unsigned long chunk = block >> bitmap->counts.chunkshift;
 911
 912        page = filemap_get_page(&bitmap->storage, chunk);
 913        if (!page)
 914                return;
 915        bit = file_page_offset(&bitmap->storage, chunk);
 916
 917        /* set the bit */
 918        kaddr = kmap_atomic(page);
 919        if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
 920                set_bit(bit, kaddr);
 921        else
 922                set_bit_le(bit, kaddr);
 923        kunmap_atomic(kaddr);
 924        pr_debug("set file bit %lu page %lu\n", bit, page->index);
 925        /* record page number so it gets flushed to disk when unplug occurs */
 926        set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY);
 927}
 928
 929static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
 930{
 931        unsigned long bit;
 932        struct page *page;
 933        void *paddr;
 934        unsigned long chunk = block >> bitmap->counts.chunkshift;
 935
 936        page = filemap_get_page(&bitmap->storage, chunk);
 937        if (!page)
 938                return;
 939        bit = file_page_offset(&bitmap->storage, chunk);
 940        paddr = kmap_atomic(page);
 941        if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
 942                clear_bit(bit, paddr);
 943        else
 944                clear_bit_le(bit, paddr);
 945        kunmap_atomic(paddr);
 946        if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
 947                set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
 948                bitmap->allclean = 0;
 949        }
 950}
 951
 952static int bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
 953{
 954        unsigned long bit;
 955        struct page *page;
 956        void *paddr;
 957        unsigned long chunk = block >> bitmap->counts.chunkshift;
 958        int set = 0;
 959
 960        page = filemap_get_page(&bitmap->storage, chunk);
 961        if (!page)
 962                return -EINVAL;
 963        bit = file_page_offset(&bitmap->storage, chunk);
 964        paddr = kmap_atomic(page);
 965        if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
 966                set = test_bit(bit, paddr);
 967        else
 968                set = test_bit_le(bit, paddr);
 969        kunmap_atomic(paddr);
 970        return set;
 971}
 972
 973
 974/* this gets called when the md device is ready to unplug its underlying
 975 * (slave) device queues -- before we let any writes go down, we need to
 976 * sync the dirty pages of the bitmap file to disk */
 977void bitmap_unplug(struct bitmap *bitmap)
 978{
 979        unsigned long i;
 980        int dirty, need_write;
 981
 982        if (!bitmap || !bitmap->storage.filemap ||
 983            test_bit(BITMAP_STALE, &bitmap->flags))
 984                return;
 985
 986        /* look at each page to see if there are any set bits that need to be
 987         * flushed out to disk */
 988        for (i = 0; i < bitmap->storage.file_pages; i++) {
 989                if (!bitmap->storage.filemap)
 990                        return;
 991                dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
 992                need_write = test_and_clear_page_attr(bitmap, i,
 993                                                      BITMAP_PAGE_NEEDWRITE);
 994                if (dirty || need_write) {
 995                        clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
 996                        write_page(bitmap, bitmap->storage.filemap[i], 0);
 997                }
 998        }
 999        if (bitmap->storage.file)
1000                wait_event(bitmap->write_wait,
1001                           atomic_read(&bitmap->pending_writes)==0);
1002        else
1003                md_super_wait(bitmap->mddev);
1004
1005        if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
1006                bitmap_file_kick(bitmap);
1007}
1008EXPORT_SYMBOL(bitmap_unplug);
1009
1010static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
1011/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
1012 * the in-memory bitmap from the on-disk bitmap -- also, sets up the
1013 * memory mapping of the bitmap file
1014 * Special cases:
1015 *   if there's no bitmap file, or if the bitmap file had been
1016 *   previously kicked from the array, we mark all the bits as
1017 *   1's in order to cause a full resync.
1018 *
1019 * We ignore all bits for sectors that end earlier than 'start'.
1020 * This is used when reading an out-of-date bitmap...
1021 */
1022static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
1023{
1024        unsigned long i, chunks, index, oldindex, bit, node_offset = 0;
1025        struct page *page = NULL;
1026        unsigned long bit_cnt = 0;
1027        struct file *file;
1028        unsigned long offset;
1029        int outofdate;
1030        int ret = -ENOSPC;
1031        void *paddr;
1032        struct bitmap_storage *store = &bitmap->storage;
1033
1034        chunks = bitmap->counts.chunks;
1035        file = store->file;
1036
1037        if (!file && !bitmap->mddev->bitmap_info.offset) {
1038                /* No permanent bitmap - fill with '1s'. */
1039                store->filemap = NULL;
1040                store->file_pages = 0;
1041                for (i = 0; i < chunks ; i++) {
1042                        /* if the disk bit is set, set the memory bit */
1043                        int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift)
1044                                      >= start);
1045                        bitmap_set_memory_bits(bitmap,
1046                                               (sector_t)i << bitmap->counts.chunkshift,
1047                                               needed);
1048                }
1049                return 0;
1050        }
1051
1052        outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
1053        if (outofdate)
1054                printk(KERN_INFO "%s: bitmap file is out of date, doing full "
1055                        "recovery\n", bmname(bitmap));
1056
1057        if (file && i_size_read(file->f_mapping->host) < store->bytes) {
1058                printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
1059                       bmname(bitmap),
1060                       (unsigned long) i_size_read(file->f_mapping->host),
1061                       store->bytes);
1062                goto err;
1063        }
1064
1065        oldindex = ~0L;
1066        offset = 0;
1067        if (!bitmap->mddev->bitmap_info.external)
1068                offset = sizeof(bitmap_super_t);
1069
1070        if (mddev_is_clustered(bitmap->mddev))
1071                node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE));
1072
1073        for (i = 0; i < chunks; i++) {
1074                int b;
1075                index = file_page_index(&bitmap->storage, i);
1076                bit = file_page_offset(&bitmap->storage, i);
1077                if (index != oldindex) { /* this is a new page, read it in */
1078                        int count;
1079                        /* unmap the old page, we're done with it */
1080                        if (index == store->file_pages-1)
1081                                count = store->bytes - index * PAGE_SIZE;
1082                        else
1083                                count = PAGE_SIZE;
1084                        page = store->filemap[index];
1085                        if (file)
1086                                ret = read_page(file, index, bitmap,
1087                                                count, page);
1088                        else
1089                                ret = read_sb_page(
1090                                        bitmap->mddev,
1091                                        bitmap->mddev->bitmap_info.offset,
1092                                        page,
1093                                        index + node_offset, count);
1094
1095                        if (ret)
1096                                goto err;
1097
1098                        oldindex = index;
1099
1100                        if (outofdate) {
1101                                /*
1102                                 * if bitmap is out of date, dirty the
1103                                 * whole page and write it out
1104                                 */
1105                                paddr = kmap_atomic(page);
1106                                memset(paddr + offset, 0xff,
1107                                       PAGE_SIZE - offset);
1108                                kunmap_atomic(paddr);
1109                                write_page(bitmap, page, 1);
1110
1111                                ret = -EIO;
1112                                if (test_bit(BITMAP_WRITE_ERROR,
1113                                             &bitmap->flags))
1114                                        goto err;
1115                        }
1116                }
1117                paddr = kmap_atomic(page);
1118                if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1119                        b = test_bit(bit, paddr);
1120                else
1121                        b = test_bit_le(bit, paddr);
1122                kunmap_atomic(paddr);
1123                if (b) {
1124                        /* if the disk bit is set, set the memory bit */
1125                        int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
1126                                      >= start);
1127                        bitmap_set_memory_bits(bitmap,
1128                                               (sector_t)i << bitmap->counts.chunkshift,
1129                                               needed);
1130                        bit_cnt++;
1131                }
1132                offset = 0;
1133        }
1134
1135        printk(KERN_INFO "%s: bitmap initialized from disk: "
1136               "read %lu pages, set %lu of %lu bits\n",
1137               bmname(bitmap), store->file_pages,
1138               bit_cnt, chunks);
1139
1140        return 0;
1141
1142 err:
1143        printk(KERN_INFO "%s: bitmap initialisation failed: %d\n",
1144               bmname(bitmap), ret);
1145        return ret;
1146}
1147
1148void bitmap_write_all(struct bitmap *bitmap)
1149{
1150        /* We don't actually write all bitmap blocks here,
1151         * just flag them as needing to be written
1152         */
1153        int i;
1154
1155        if (!bitmap || !bitmap->storage.filemap)
1156                return;
1157        if (bitmap->storage.file)
1158                /* Only one copy, so nothing needed */
1159                return;
1160
1161        for (i = 0; i < bitmap->storage.file_pages; i++)
1162                set_page_attr(bitmap, i,
1163                              BITMAP_PAGE_NEEDWRITE);
1164        bitmap->allclean = 0;
1165}
1166
1167static void bitmap_count_page(struct bitmap_counts *bitmap,
1168                              sector_t offset, int inc)
1169{
1170        sector_t chunk = offset >> bitmap->chunkshift;
1171        unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1172        bitmap->bp[page].count += inc;
1173        bitmap_checkfree(bitmap, page);
1174}
1175
1176static void bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset)
1177{
1178        sector_t chunk = offset >> bitmap->chunkshift;
1179        unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1180        struct bitmap_page *bp = &bitmap->bp[page];
1181
1182        if (!bp->pending)
1183                bp->pending = 1;
1184}
1185
1186static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
1187                                            sector_t offset, sector_t *blocks,
1188                                            int create);
1189
1190/*
1191 * bitmap daemon -- periodically wakes up to clean bits and flush pages
1192 *                      out to disk
1193 */
1194
1195void bitmap_daemon_work(struct mddev *mddev)
1196{
1197        struct bitmap *bitmap;
1198        unsigned long j;
1199        unsigned long nextpage;
1200        sector_t blocks;
1201        struct bitmap_counts *counts;
1202
1203        /* Use a mutex to guard daemon_work against
1204         * bitmap_destroy.
1205         */
1206        mutex_lock(&mddev->bitmap_info.mutex);
1207        bitmap = mddev->bitmap;
1208        if (bitmap == NULL) {
1209                mutex_unlock(&mddev->bitmap_info.mutex);
1210                return;
1211        }
1212        if (time_before(jiffies, bitmap->daemon_lastrun
1213                        + mddev->bitmap_info.daemon_sleep))
1214                goto done;
1215
1216        bitmap->daemon_lastrun = jiffies;
1217        if (bitmap->allclean) {
1218                mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1219                goto done;
1220        }
1221        bitmap->allclean = 1;
1222
1223        /* Any file-page which is PENDING now needs to be written.
1224         * So set NEEDWRITE now, then after we make any last-minute changes
1225         * we will write it.
1226         */
1227        for (j = 0; j < bitmap->storage.file_pages; j++)
1228                if (test_and_clear_page_attr(bitmap, j,
1229                                             BITMAP_PAGE_PENDING))
1230                        set_page_attr(bitmap, j,
1231                                      BITMAP_PAGE_NEEDWRITE);
1232
1233        if (bitmap->need_sync &&
1234            mddev->bitmap_info.external == 0) {
1235                /* Arrange for superblock update as well as
1236                 * other changes */
1237                bitmap_super_t *sb;
1238                bitmap->need_sync = 0;
1239                if (bitmap->storage.filemap) {
1240                        sb = kmap_atomic(bitmap->storage.sb_page);
1241                        sb->events_cleared =
1242                                cpu_to_le64(bitmap->events_cleared);
1243                        kunmap_atomic(sb);
1244                        set_page_attr(bitmap, 0,
1245                                      BITMAP_PAGE_NEEDWRITE);
1246                }
1247        }
1248        /* Now look at the bitmap counters and if any are '2' or '1',
1249         * decrement and handle accordingly.
1250         */
1251        counts = &bitmap->counts;
1252        spin_lock_irq(&counts->lock);
1253        nextpage = 0;
1254        for (j = 0; j < counts->chunks; j++) {
1255                bitmap_counter_t *bmc;
1256                sector_t  block = (sector_t)j << counts->chunkshift;
1257
1258                if (j == nextpage) {
1259                        nextpage += PAGE_COUNTER_RATIO;
1260                        if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) {
1261                                j |= PAGE_COUNTER_MASK;
1262                                continue;
1263                        }
1264                        counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0;
1265                }
1266                bmc = bitmap_get_counter(counts,
1267                                         block,
1268                                         &blocks, 0);
1269
1270                if (!bmc) {
1271                        j |= PAGE_COUNTER_MASK;
1272                        continue;
1273                }
1274                if (*bmc == 1 && !bitmap->need_sync) {
1275                        /* We can clear the bit */
1276                        *bmc = 0;
1277                        bitmap_count_page(counts, block, -1);
1278                        bitmap_file_clear_bit(bitmap, block);
1279                } else if (*bmc && *bmc <= 2) {
1280                        *bmc = 1;
1281                        bitmap_set_pending(counts, block);
1282                        bitmap->allclean = 0;
1283                }
1284        }
1285        spin_unlock_irq(&counts->lock);
1286
1287        /* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
1288         * DIRTY pages need to be written by bitmap_unplug so it can wait
1289         * for them.
1290         * If we find any DIRTY page we stop there and let bitmap_unplug
1291         * handle all the rest.  This is important in the case where
1292         * the first blocking holds the superblock and it has been updated.
1293         * We mustn't write any other blocks before the superblock.
1294         */
1295        for (j = 0;
1296             j < bitmap->storage.file_pages
1297                     && !test_bit(BITMAP_STALE, &bitmap->flags);
1298             j++) {
1299                if (test_page_attr(bitmap, j,
1300                                   BITMAP_PAGE_DIRTY))
1301                        /* bitmap_unplug will handle the rest */
1302                        break;
1303                if (test_and_clear_page_attr(bitmap, j,
1304                                             BITMAP_PAGE_NEEDWRITE)) {
1305                        write_page(bitmap, bitmap->storage.filemap[j], 0);
1306                }
1307        }
1308
1309 done:
1310        if (bitmap->allclean == 0)
1311                mddev->thread->timeout =
1312                        mddev->bitmap_info.daemon_sleep;
1313        mutex_unlock(&mddev->bitmap_info.mutex);
1314}
1315
1316static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
1317                                            sector_t offset, sector_t *blocks,
1318                                            int create)
1319__releases(bitmap->lock)
1320__acquires(bitmap->lock)
1321{
1322        /* If 'create', we might release the lock and reclaim it.
1323         * The lock must have been taken with interrupts enabled.
1324         * If !create, we don't release the lock.
1325         */
1326        sector_t chunk = offset >> bitmap->chunkshift;
1327        unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1328        unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1329        sector_t csize;
1330        int err;
1331
1332        err = bitmap_checkpage(bitmap, page, create);
1333
1334        if (bitmap->bp[page].hijacked ||
1335            bitmap->bp[page].map == NULL)
1336                csize = ((sector_t)1) << (bitmap->chunkshift +
1337                                          PAGE_COUNTER_SHIFT - 1);
1338        else
1339                csize = ((sector_t)1) << bitmap->chunkshift;
1340        *blocks = csize - (offset & (csize - 1));
1341
1342        if (err < 0)
1343                return NULL;
1344
1345        /* now locked ... */
1346
1347        if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1348                /* should we use the first or second counter field
1349                 * of the hijacked pointer? */
1350                int hi = (pageoff > PAGE_COUNTER_MASK);
1351                return  &((bitmap_counter_t *)
1352                          &bitmap->bp[page].map)[hi];
1353        } else /* page is allocated */
1354                return (bitmap_counter_t *)
1355                        &(bitmap->bp[page].map[pageoff]);
1356}
1357
1358int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
1359{
1360        if (!bitmap)
1361                return 0;
1362
1363        if (behind) {
1364                int bw;
1365                atomic_inc(&bitmap->behind_writes);
1366                bw = atomic_read(&bitmap->behind_writes);
1367                if (bw > bitmap->behind_writes_used)
1368                        bitmap->behind_writes_used = bw;
1369
1370                pr_debug("inc write-behind count %d/%lu\n",
1371                         bw, bitmap->mddev->bitmap_info.max_write_behind);
1372        }
1373
1374        while (sectors) {
1375                sector_t blocks;
1376                bitmap_counter_t *bmc;
1377
1378                spin_lock_irq(&bitmap->counts.lock);
1379                bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 1);
1380                if (!bmc) {
1381                        spin_unlock_irq(&bitmap->counts.lock);
1382                        return 0;
1383                }
1384
1385                if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
1386                        DEFINE_WAIT(__wait);
1387                        /* note that it is safe to do the prepare_to_wait
1388                         * after the test as long as we do it before dropping
1389                         * the spinlock.
1390                         */
1391                        prepare_to_wait(&bitmap->overflow_wait, &__wait,
1392                                        TASK_UNINTERRUPTIBLE);
1393                        spin_unlock_irq(&bitmap->counts.lock);
1394                        schedule();
1395                        finish_wait(&bitmap->overflow_wait, &__wait);
1396                        continue;
1397                }
1398
1399                switch (*bmc) {
1400                case 0:
1401                        bitmap_file_set_bit(bitmap, offset);
1402                        bitmap_count_page(&bitmap->counts, offset, 1);
1403                        /* fall through */
1404                case 1:
1405                        *bmc = 2;
1406                }
1407
1408                (*bmc)++;
1409
1410                spin_unlock_irq(&bitmap->counts.lock);
1411
1412                offset += blocks;
1413                if (sectors > blocks)
1414                        sectors -= blocks;
1415                else
1416                        sectors = 0;
1417        }
1418        return 0;
1419}
1420EXPORT_SYMBOL(bitmap_startwrite);
1421
1422void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
1423                     int success, int behind)
1424{
1425        if (!bitmap)
1426                return;
1427        if (behind) {
1428                if (atomic_dec_and_test(&bitmap->behind_writes))
1429                        wake_up(&bitmap->behind_wait);
1430                pr_debug("dec write-behind count %d/%lu\n",
1431                         atomic_read(&bitmap->behind_writes),
1432                         bitmap->mddev->bitmap_info.max_write_behind);
1433        }
1434
1435        while (sectors) {
1436                sector_t blocks;
1437                unsigned long flags;
1438                bitmap_counter_t *bmc;
1439
1440                spin_lock_irqsave(&bitmap->counts.lock, flags);
1441                bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 0);
1442                if (!bmc) {
1443                        spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1444                        return;
1445                }
1446
1447                if (success && !bitmap->mddev->degraded &&
1448                    bitmap->events_cleared < bitmap->mddev->events) {
1449                        bitmap->events_cleared = bitmap->mddev->events;
1450                        bitmap->need_sync = 1;
1451                        sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
1452                }
1453
1454                if (!success && !NEEDED(*bmc))
1455                        *bmc |= NEEDED_MASK;
1456
1457                if (COUNTER(*bmc) == COUNTER_MAX)
1458                        wake_up(&bitmap->overflow_wait);
1459
1460                (*bmc)--;
1461                if (*bmc <= 2) {
1462                        bitmap_set_pending(&bitmap->counts, offset);
1463                        bitmap->allclean = 0;
1464                }
1465                spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1466                offset += blocks;
1467                if (sectors > blocks)
1468                        sectors -= blocks;
1469                else
1470                        sectors = 0;
1471        }
1472}
1473EXPORT_SYMBOL(bitmap_endwrite);
1474
1475static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1476                               int degraded)
1477{
1478        bitmap_counter_t *bmc;
1479        int rv;
1480        if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
1481                *blocks = 1024;
1482                return 1; /* always resync if no bitmap */
1483        }
1484        spin_lock_irq(&bitmap->counts.lock);
1485        bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1486        rv = 0;
1487        if (bmc) {
1488                /* locked */
1489                if (RESYNC(*bmc))
1490                        rv = 1;
1491                else if (NEEDED(*bmc)) {
1492                        rv = 1;
1493                        if (!degraded) { /* don't set/clear bits if degraded */
1494                                *bmc |= RESYNC_MASK;
1495                                *bmc &= ~NEEDED_MASK;
1496                        }
1497                }
1498        }
1499        spin_unlock_irq(&bitmap->counts.lock);
1500        return rv;
1501}
1502
1503int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1504                      int degraded)
1505{
1506        /* bitmap_start_sync must always report on multiples of whole
1507         * pages, otherwise resync (which is very PAGE_SIZE based) will
1508         * get confused.
1509         * So call __bitmap_start_sync repeatedly (if needed) until
1510         * At least PAGE_SIZE>>9 blocks are covered.
1511         * Return the 'or' of the result.
1512         */
1513        int rv = 0;
1514        sector_t blocks1;
1515
1516        *blocks = 0;
1517        while (*blocks < (PAGE_SIZE>>9)) {
1518                rv |= __bitmap_start_sync(bitmap, offset,
1519                                          &blocks1, degraded);
1520                offset += blocks1;
1521                *blocks += blocks1;
1522        }
1523        return rv;
1524}
1525EXPORT_SYMBOL(bitmap_start_sync);
1526
1527void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted)
1528{
1529        bitmap_counter_t *bmc;
1530        unsigned long flags;
1531
1532        if (bitmap == NULL) {
1533                *blocks = 1024;
1534                return;
1535        }
1536        spin_lock_irqsave(&bitmap->counts.lock, flags);
1537        bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1538        if (bmc == NULL)
1539                goto unlock;
1540        /* locked */
1541        if (RESYNC(*bmc)) {
1542                *bmc &= ~RESYNC_MASK;
1543
1544                if (!NEEDED(*bmc) && aborted)
1545                        *bmc |= NEEDED_MASK;
1546                else {
1547                        if (*bmc <= 2) {
1548                                bitmap_set_pending(&bitmap->counts, offset);
1549                                bitmap->allclean = 0;
1550                        }
1551                }
1552        }
1553 unlock:
1554        spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1555}
1556EXPORT_SYMBOL(bitmap_end_sync);
1557
1558void bitmap_close_sync(struct bitmap *bitmap)
1559{
1560        /* Sync has finished, and any bitmap chunks that weren't synced
1561         * properly have been aborted.  It remains to us to clear the
1562         * RESYNC bit wherever it is still on
1563         */
1564        sector_t sector = 0;
1565        sector_t blocks;
1566        if (!bitmap)
1567                return;
1568        while (sector < bitmap->mddev->resync_max_sectors) {
1569                bitmap_end_sync(bitmap, sector, &blocks, 0);
1570                sector += blocks;
1571        }
1572}
1573EXPORT_SYMBOL(bitmap_close_sync);
1574
1575void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
1576{
1577        sector_t s = 0;
1578        sector_t blocks;
1579
1580        if (!bitmap)
1581                return;
1582        if (sector == 0) {
1583                bitmap->last_end_sync = jiffies;
1584                return;
1585        }
1586        if (time_before(jiffies, (bitmap->last_end_sync
1587                                  + bitmap->mddev->bitmap_info.daemon_sleep)))
1588                return;
1589        wait_event(bitmap->mddev->recovery_wait,
1590                   atomic_read(&bitmap->mddev->recovery_active) == 0);
1591
1592        bitmap->mddev->curr_resync_completed = sector;
1593        set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
1594        sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
1595        s = 0;
1596        while (s < sector && s < bitmap->mddev->resync_max_sectors) {
1597                bitmap_end_sync(bitmap, s, &blocks, 0);
1598                s += blocks;
1599        }
1600        bitmap->last_end_sync = jiffies;
1601        sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed");
1602}
1603EXPORT_SYMBOL(bitmap_cond_end_sync);
1604
1605static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1606{
1607        /* For each chunk covered by any of these sectors, set the
1608         * counter to 2 and possibly set resync_needed.  They should all
1609         * be 0 at this point
1610         */
1611
1612        sector_t secs;
1613        bitmap_counter_t *bmc;
1614        spin_lock_irq(&bitmap->counts.lock);
1615        bmc = bitmap_get_counter(&bitmap->counts, offset, &secs, 1);
1616        if (!bmc) {
1617                spin_unlock_irq(&bitmap->counts.lock);
1618                return;
1619        }
1620        if (!*bmc) {
1621                *bmc = 2;
1622                bitmap_count_page(&bitmap->counts, offset, 1);
1623                bitmap_set_pending(&bitmap->counts, offset);
1624                bitmap->allclean = 0;
1625        }
1626        if (needed)
1627                *bmc |= NEEDED_MASK;
1628        spin_unlock_irq(&bitmap->counts.lock);
1629}
1630
1631/* dirty the memory and file bits for bitmap chunks "s" to "e" */
1632void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
1633{
1634        unsigned long chunk;
1635
1636        for (chunk = s; chunk <= e; chunk++) {
1637                sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift;
1638                bitmap_set_memory_bits(bitmap, sec, 1);
1639                bitmap_file_set_bit(bitmap, sec);
1640                if (sec < bitmap->mddev->recovery_cp)
1641                        /* We are asserting that the array is dirty,
1642                         * so move the recovery_cp address back so
1643                         * that it is obvious that it is dirty
1644                         */
1645                        bitmap->mddev->recovery_cp = sec;
1646        }
1647}
1648
1649/*
1650 * flush out any pending updates
1651 */
1652void bitmap_flush(struct mddev *mddev)
1653{
1654        struct bitmap *bitmap = mddev->bitmap;
1655        long sleep;
1656
1657        if (!bitmap) /* there was no bitmap */
1658                return;
1659
1660        /* run the daemon_work three time to ensure everything is flushed
1661         * that can be
1662         */
1663        sleep = mddev->bitmap_info.daemon_sleep * 2;
1664        bitmap->daemon_lastrun -= sleep;
1665        bitmap_daemon_work(mddev);
1666        bitmap->daemon_lastrun -= sleep;
1667        bitmap_daemon_work(mddev);
1668        bitmap->daemon_lastrun -= sleep;
1669        bitmap_daemon_work(mddev);
1670        bitmap_update_sb(bitmap);
1671}
1672
1673/*
1674 * free memory that was allocated
1675 */
1676static void bitmap_free(struct bitmap *bitmap)
1677{
1678        unsigned long k, pages;
1679        struct bitmap_page *bp;
1680
1681        if (!bitmap) /* there was no bitmap */
1682                return;
1683
1684        if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info &&
1685                bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev))
1686                md_cluster_stop(bitmap->mddev);
1687
1688        /* Shouldn't be needed - but just in case.... */
1689        wait_event(bitmap->write_wait,
1690                   atomic_read(&bitmap->pending_writes) == 0);
1691
1692        /* release the bitmap file  */
1693        bitmap_file_unmap(&bitmap->storage);
1694
1695        bp = bitmap->counts.bp;
1696        pages = bitmap->counts.pages;
1697
1698        /* free all allocated memory */
1699
1700        if (bp) /* deallocate the page memory */
1701                for (k = 0; k < pages; k++)
1702                        if (bp[k].map && !bp[k].hijacked)
1703                                kfree(bp[k].map);
1704        kfree(bp);
1705        kfree(bitmap);
1706}
1707
1708void bitmap_destroy(struct mddev *mddev)
1709{
1710        struct bitmap *bitmap = mddev->bitmap;
1711
1712        if (!bitmap) /* there was no bitmap */
1713                return;
1714
1715        mutex_lock(&mddev->bitmap_info.mutex);
1716        spin_lock(&mddev->lock);
1717        mddev->bitmap = NULL; /* disconnect from the md device */
1718        spin_unlock(&mddev->lock);
1719        mutex_unlock(&mddev->bitmap_info.mutex);
1720        if (mddev->thread)
1721                mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1722
1723        if (bitmap->sysfs_can_clear)
1724                sysfs_put(bitmap->sysfs_can_clear);
1725
1726        bitmap_free(bitmap);
1727}
1728
1729/*
1730 * initialize the bitmap structure
1731 * if this returns an error, bitmap_destroy must be called to do clean up
1732 */
1733struct bitmap *bitmap_create(struct mddev *mddev, int slot)
1734{
1735        struct bitmap *bitmap;
1736        sector_t blocks = mddev->resync_max_sectors;
1737        struct file *file = mddev->bitmap_info.file;
1738        int err;
1739        struct kernfs_node *bm = NULL;
1740
1741        BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
1742
1743        BUG_ON(file && mddev->bitmap_info.offset);
1744
1745        bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
1746        if (!bitmap)
1747                return ERR_PTR(-ENOMEM);
1748
1749        spin_lock_init(&bitmap->counts.lock);
1750        atomic_set(&bitmap->pending_writes, 0);
1751        init_waitqueue_head(&bitmap->write_wait);
1752        init_waitqueue_head(&bitmap->overflow_wait);
1753        init_waitqueue_head(&bitmap->behind_wait);
1754
1755        bitmap->mddev = mddev;
1756        bitmap->cluster_slot = slot;
1757
1758        if (mddev->kobj.sd)
1759                bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
1760        if (bm) {
1761                bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
1762                sysfs_put(bm);
1763        } else
1764                bitmap->sysfs_can_clear = NULL;
1765
1766        bitmap->storage.file = file;
1767        if (file) {
1768                get_file(file);
1769                /* As future accesses to this file will use bmap,
1770                 * and bypass the page cache, we must sync the file
1771                 * first.
1772                 */
1773                vfs_fsync(file, 1);
1774        }
1775        /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
1776        if (!mddev->bitmap_info.external) {
1777                /*
1778                 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
1779                 * instructing us to create a new on-disk bitmap instance.
1780                 */
1781                if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
1782                        err = bitmap_new_disk_sb(bitmap);
1783                else
1784                        err = bitmap_read_sb(bitmap);
1785        } else {
1786                err = 0;
1787                if (mddev->bitmap_info.chunksize == 0 ||
1788                    mddev->bitmap_info.daemon_sleep == 0)
1789                        /* chunksize and time_base need to be
1790                         * set first. */
1791                        err = -EINVAL;
1792        }
1793        if (err)
1794                goto error;
1795
1796        bitmap->daemon_lastrun = jiffies;
1797        err = bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1);
1798        if (err)
1799                goto error;
1800
1801        printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
1802               bitmap->counts.pages, bmname(bitmap));
1803
1804        err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
1805        if (err)
1806                goto error;
1807
1808        return bitmap;
1809 error:
1810        bitmap_free(bitmap);
1811        return ERR_PTR(err);
1812}
1813
1814int bitmap_load(struct mddev *mddev)
1815{
1816        int err = 0;
1817        sector_t start = 0;
1818        sector_t sector = 0;
1819        struct bitmap *bitmap = mddev->bitmap;
1820
1821        if (!bitmap)
1822                goto out;
1823
1824        /* Clear out old bitmap info first:  Either there is none, or we
1825         * are resuming after someone else has possibly changed things,
1826         * so we should forget old cached info.
1827         * All chunks should be clean, but some might need_sync.
1828         */
1829        while (sector < mddev->resync_max_sectors) {
1830                sector_t blocks;
1831                bitmap_start_sync(bitmap, sector, &blocks, 0);
1832                sector += blocks;
1833        }
1834        bitmap_close_sync(bitmap);
1835
1836        if (mddev->degraded == 0
1837            || bitmap->events_cleared == mddev->events)
1838                /* no need to keep dirty bits to optimise a
1839                 * re-add of a missing device */
1840                start = mddev->recovery_cp;
1841
1842        mutex_lock(&mddev->bitmap_info.mutex);
1843        err = bitmap_init_from_disk(bitmap, start);
1844        mutex_unlock(&mddev->bitmap_info.mutex);
1845
1846        if (err)
1847                goto out;
1848        clear_bit(BITMAP_STALE, &bitmap->flags);
1849
1850        /* Kick recovery in case any bits were set */
1851        set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
1852
1853        mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
1854        md_wakeup_thread(mddev->thread);
1855
1856        bitmap_update_sb(bitmap);
1857
1858        if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
1859                err = -EIO;
1860out:
1861        return err;
1862}
1863EXPORT_SYMBOL_GPL(bitmap_load);
1864
1865/* Loads the bitmap associated with slot and copies the resync information
1866 * to our bitmap
1867 */
1868int bitmap_copy_from_slot(struct mddev *mddev, int slot,
1869                sector_t *low, sector_t *high, bool clear_bits)
1870{
1871        int rv = 0, i, j;
1872        sector_t block, lo = 0, hi = 0;
1873        struct bitmap_counts *counts;
1874        struct bitmap *bitmap = bitmap_create(mddev, slot);
1875
1876        if (IS_ERR(bitmap))
1877                return PTR_ERR(bitmap);
1878
1879        rv = bitmap_init_from_disk(bitmap, 0);
1880        if (rv)
1881                goto err;
1882
1883        counts = &bitmap->counts;
1884        for (j = 0; j < counts->chunks; j++) {
1885                block = (sector_t)j << counts->chunkshift;
1886                if (bitmap_file_test_bit(bitmap, block)) {
1887                        if (!lo)
1888                                lo = block;
1889                        hi = block;
1890                        bitmap_file_clear_bit(bitmap, block);
1891                        bitmap_set_memory_bits(mddev->bitmap, block, 1);
1892                        bitmap_file_set_bit(mddev->bitmap, block);
1893                }
1894        }
1895
1896        if (clear_bits) {
1897                bitmap_update_sb(bitmap);
1898                /* Setting this for the ev_page should be enough.
1899                 * And we do not require both write_all and PAGE_DIRT either
1900                 */
1901                for (i = 0; i < bitmap->storage.file_pages; i++)
1902                        set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
1903                bitmap_write_all(bitmap);
1904                bitmap_unplug(bitmap);
1905        }
1906        *low = lo;
1907        *high = hi;
1908err:
1909        bitmap_free(bitmap);
1910        return rv;
1911}
1912EXPORT_SYMBOL_GPL(bitmap_copy_from_slot);
1913
1914
1915void bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
1916{
1917        unsigned long chunk_kb;
1918        struct bitmap_counts *counts;
1919
1920        if (!bitmap)
1921                return;
1922
1923        counts = &bitmap->counts;
1924
1925        chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10;
1926        seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
1927                   "%lu%s chunk",
1928                   counts->pages - counts->missing_pages,
1929                   counts->pages,
1930                   (counts->pages - counts->missing_pages)
1931                   << (PAGE_SHIFT - 10),
1932                   chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize,
1933                   chunk_kb ? "KB" : "B");
1934        if (bitmap->storage.file) {
1935                seq_printf(seq, ", file: ");
1936                seq_file_path(seq, bitmap->storage.file, " \t\n");
1937        }
1938
1939        seq_printf(seq, "\n");
1940}
1941
1942int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
1943                  int chunksize, int init)
1944{
1945        /* If chunk_size is 0, choose an appropriate chunk size.
1946         * Then possibly allocate new storage space.
1947         * Then quiesce, copy bits, replace bitmap, and re-start
1948         *
1949         * This function is called both to set up the initial bitmap
1950         * and to resize the bitmap while the array is active.
1951         * If this happens as a result of the array being resized,
1952         * chunksize will be zero, and we need to choose a suitable
1953         * chunksize, otherwise we use what we are given.
1954         */
1955        struct bitmap_storage store;
1956        struct bitmap_counts old_counts;
1957        unsigned long chunks;
1958        sector_t block;
1959        sector_t old_blocks, new_blocks;
1960        int chunkshift;
1961        int ret = 0;
1962        long pages;
1963        struct bitmap_page *new_bp;
1964
1965        if (chunksize == 0) {
1966                /* If there is enough space, leave the chunk size unchanged,
1967                 * else increase by factor of two until there is enough space.
1968                 */
1969                long bytes;
1970                long space = bitmap->mddev->bitmap_info.space;
1971
1972                if (space == 0) {
1973                        /* We don't know how much space there is, so limit
1974                         * to current size - in sectors.
1975                         */
1976                        bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8);
1977                        if (!bitmap->mddev->bitmap_info.external)
1978                                bytes += sizeof(bitmap_super_t);
1979                        space = DIV_ROUND_UP(bytes, 512);
1980                        bitmap->mddev->bitmap_info.space = space;
1981                }
1982                chunkshift = bitmap->counts.chunkshift;
1983                chunkshift--;
1984                do {
1985                        /* 'chunkshift' is shift from block size to chunk size */
1986                        chunkshift++;
1987                        chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
1988                        bytes = DIV_ROUND_UP(chunks, 8);
1989                        if (!bitmap->mddev->bitmap_info.external)
1990                                bytes += sizeof(bitmap_super_t);
1991                } while (bytes > (space << 9));
1992        } else
1993                chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
1994
1995        chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
1996        memset(&store, 0, sizeof(store));
1997        if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
1998                ret = bitmap_storage_alloc(&store, chunks,
1999                                           !bitmap->mddev->bitmap_info.external,
2000                                           bitmap->cluster_slot);
2001        if (ret)
2002                goto err;
2003
2004        pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO);
2005
2006        new_bp = kzalloc(pages * sizeof(*new_bp), GFP_KERNEL);
2007        ret = -ENOMEM;
2008        if (!new_bp) {
2009                bitmap_file_unmap(&store);
2010                goto err;
2011        }
2012
2013        if (!init)
2014                bitmap->mddev->pers->quiesce(bitmap->mddev, 1);
2015
2016        store.file = bitmap->storage.file;
2017        bitmap->storage.file = NULL;
2018
2019        if (store.sb_page && bitmap->storage.sb_page)
2020                memcpy(page_address(store.sb_page),
2021                       page_address(bitmap->storage.sb_page),
2022                       sizeof(bitmap_super_t));
2023        bitmap_file_unmap(&bitmap->storage);
2024        bitmap->storage = store;
2025
2026        old_counts = bitmap->counts;
2027        bitmap->counts.bp = new_bp;
2028        bitmap->counts.pages = pages;
2029        bitmap->counts.missing_pages = pages;
2030        bitmap->counts.chunkshift = chunkshift;
2031        bitmap->counts.chunks = chunks;
2032        bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift +
2033                                                     BITMAP_BLOCK_SHIFT);
2034
2035        blocks = min(old_counts.chunks << old_counts.chunkshift,
2036                     chunks << chunkshift);
2037
2038        spin_lock_irq(&bitmap->counts.lock);
2039        for (block = 0; block < blocks; ) {
2040                bitmap_counter_t *bmc_old, *bmc_new;
2041                int set;
2042
2043                bmc_old = bitmap_get_counter(&old_counts, block,
2044                                             &old_blocks, 0);
2045                set = bmc_old && NEEDED(*bmc_old);
2046
2047                if (set) {
2048                        bmc_new = bitmap_get_counter(&bitmap->counts, block,
2049                                                     &new_blocks, 1);
2050                        if (*bmc_new == 0) {
2051                                /* need to set on-disk bits too. */
2052                                sector_t end = block + new_blocks;
2053                                sector_t start = block >> chunkshift;
2054                                start <<= chunkshift;
2055                                while (start < end) {
2056                                        bitmap_file_set_bit(bitmap, block);
2057                                        start += 1 << chunkshift;
2058                                }
2059                                *bmc_new = 2;
2060                                bitmap_count_page(&bitmap->counts,
2061                                                  block, 1);
2062                                bitmap_set_pending(&bitmap->counts,
2063                                                   block);
2064                        }
2065                        *bmc_new |= NEEDED_MASK;
2066                        if (new_blocks < old_blocks)
2067                                old_blocks = new_blocks;
2068                }
2069                block += old_blocks;
2070        }
2071
2072        if (!init) {
2073                int i;
2074                while (block < (chunks << chunkshift)) {
2075                        bitmap_counter_t *bmc;
2076                        bmc = bitmap_get_counter(&bitmap->counts, block,
2077                                                 &new_blocks, 1);
2078                        if (bmc) {
2079                                /* new space.  It needs to be resynced, so
2080                                 * we set NEEDED_MASK.
2081                                 */
2082                                if (*bmc == 0) {
2083                                        *bmc = NEEDED_MASK | 2;
2084                                        bitmap_count_page(&bitmap->counts,
2085                                                          block, 1);
2086                                        bitmap_set_pending(&bitmap->counts,
2087                                                           block);
2088                                }
2089                        }
2090                        block += new_blocks;
2091                }
2092                for (i = 0; i < bitmap->storage.file_pages; i++)
2093                        set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
2094        }
2095        spin_unlock_irq(&bitmap->counts.lock);
2096
2097        if (!init) {
2098                bitmap_unplug(bitmap);
2099                bitmap->mddev->pers->quiesce(bitmap->mddev, 0);
2100        }
2101        ret = 0;
2102err:
2103        return ret;
2104}
2105EXPORT_SYMBOL_GPL(bitmap_resize);
2106
2107static ssize_t
2108location_show(struct mddev *mddev, char *page)
2109{
2110        ssize_t len;
2111        if (mddev->bitmap_info.file)
2112                len = sprintf(page, "file");
2113        else if (mddev->bitmap_info.offset)
2114                len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
2115        else
2116                len = sprintf(page, "none");
2117        len += sprintf(page+len, "\n");
2118        return len;
2119}
2120
2121static ssize_t
2122location_store(struct mddev *mddev, const char *buf, size_t len)
2123{
2124
2125        if (mddev->pers) {
2126                if (!mddev->pers->quiesce)
2127                        return -EBUSY;
2128                if (mddev->recovery || mddev->sync_thread)
2129                        return -EBUSY;
2130        }
2131
2132        if (mddev->bitmap || mddev->bitmap_info.file ||
2133            mddev->bitmap_info.offset) {
2134                /* bitmap already configured.  Only option is to clear it */
2135                if (strncmp(buf, "none", 4) != 0)
2136                        return -EBUSY;
2137                if (mddev->pers) {
2138                        mddev->pers->quiesce(mddev, 1);
2139                        bitmap_destroy(mddev);
2140                        mddev->pers->quiesce(mddev, 0);
2141                }
2142                mddev->bitmap_info.offset = 0;
2143                if (mddev->bitmap_info.file) {
2144                        struct file *f = mddev->bitmap_info.file;
2145                        mddev->bitmap_info.file = NULL;
2146                        fput(f);
2147                }
2148        } else {
2149                /* No bitmap, OK to set a location */
2150                long long offset;
2151                if (strncmp(buf, "none", 4) == 0)
2152                        /* nothing to be done */;
2153                else if (strncmp(buf, "file:", 5) == 0) {
2154                        /* Not supported yet */
2155                        return -EINVAL;
2156                } else {
2157                        int rv;
2158                        if (buf[0] == '+')
2159                                rv = kstrtoll(buf+1, 10, &offset);
2160                        else
2161                                rv = kstrtoll(buf, 10, &offset);
2162                        if (rv)
2163                                return rv;
2164                        if (offset == 0)
2165                                return -EINVAL;
2166                        if (mddev->bitmap_info.external == 0 &&
2167                            mddev->major_version == 0 &&
2168                            offset != mddev->bitmap_info.default_offset)
2169                                return -EINVAL;
2170                        mddev->bitmap_info.offset = offset;
2171                        if (mddev->pers) {
2172                                struct bitmap *bitmap;
2173                                mddev->pers->quiesce(mddev, 1);
2174                                bitmap = bitmap_create(mddev, -1);
2175                                if (IS_ERR(bitmap))
2176                                        rv = PTR_ERR(bitmap);
2177                                else {
2178                                        mddev->bitmap = bitmap;
2179                                        rv = bitmap_load(mddev);
2180                                        if (rv) {
2181                                                bitmap_destroy(mddev);
2182                                                mddev->bitmap_info.offset = 0;
2183                                        }
2184                                }
2185                                mddev->pers->quiesce(mddev, 0);
2186                                if (rv)
2187                                        return rv;
2188                        }
2189                }
2190        }
2191        if (!mddev->external) {
2192                /* Ensure new bitmap info is stored in
2193                 * metadata promptly.
2194                 */
2195                set_bit(MD_CHANGE_DEVS, &mddev->flags);
2196                md_wakeup_thread(mddev->thread);
2197        }
2198        return len;
2199}
2200
2201static struct md_sysfs_entry bitmap_location =
2202__ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
2203
2204/* 'bitmap/space' is the space available at 'location' for the
2205 * bitmap.  This allows the kernel to know when it is safe to
2206 * resize the bitmap to match a resized array.
2207 */
2208static ssize_t
2209space_show(struct mddev *mddev, char *page)
2210{
2211        return sprintf(page, "%lu\n", mddev->bitmap_info.space);
2212}
2213
2214static ssize_t
2215space_store(struct mddev *mddev, const char *buf, size_t len)
2216{
2217        unsigned long sectors;
2218        int rv;
2219
2220        rv = kstrtoul(buf, 10, &sectors);
2221        if (rv)
2222                return rv;
2223
2224        if (sectors == 0)
2225                return -EINVAL;
2226
2227        if (mddev->bitmap &&
2228            sectors < (mddev->bitmap->storage.bytes + 511) >> 9)
2229                return -EFBIG; /* Bitmap is too big for this small space */
2230
2231        /* could make sure it isn't too big, but that isn't really
2232         * needed - user-space should be careful.
2233         */
2234        mddev->bitmap_info.space = sectors;
2235        return len;
2236}
2237
2238static struct md_sysfs_entry bitmap_space =
2239__ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store);
2240
2241static ssize_t
2242timeout_show(struct mddev *mddev, char *page)
2243{
2244        ssize_t len;
2245        unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
2246        unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
2247
2248        len = sprintf(page, "%lu", secs);
2249        if (jifs)
2250                len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
2251        len += sprintf(page+len, "\n");
2252        return len;
2253}
2254
2255static ssize_t
2256timeout_store(struct mddev *mddev, const char *buf, size_t len)
2257{
2258        /* timeout can be set at any time */
2259        unsigned long timeout;
2260        int rv = strict_strtoul_scaled(buf, &timeout, 4);
2261        if (rv)
2262                return rv;
2263
2264        /* just to make sure we don't overflow... */
2265        if (timeout >= LONG_MAX / HZ)
2266                return -EINVAL;
2267
2268        timeout = timeout * HZ / 10000;
2269
2270        if (timeout >= MAX_SCHEDULE_TIMEOUT)
2271                timeout = MAX_SCHEDULE_TIMEOUT-1;
2272        if (timeout < 1)
2273                timeout = 1;
2274        mddev->bitmap_info.daemon_sleep = timeout;
2275        if (mddev->thread) {
2276                /* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
2277                 * the bitmap is all clean and we don't need to
2278                 * adjust the timeout right now
2279                 */
2280                if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
2281                        mddev->thread->timeout = timeout;
2282                        md_wakeup_thread(mddev->thread);
2283                }
2284        }
2285        return len;
2286}
2287
2288static struct md_sysfs_entry bitmap_timeout =
2289__ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);
2290
2291static ssize_t
2292backlog_show(struct mddev *mddev, char *page)
2293{
2294        return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
2295}
2296
2297static ssize_t
2298backlog_store(struct mddev *mddev, const char *buf, size_t len)
2299{
2300        unsigned long backlog;
2301        int rv = kstrtoul(buf, 10, &backlog);
2302        if (rv)
2303                return rv;
2304        if (backlog > COUNTER_MAX)
2305                return -EINVAL;
2306        mddev->bitmap_info.max_write_behind = backlog;
2307        return len;
2308}
2309
2310static struct md_sysfs_entry bitmap_backlog =
2311__ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
2312
2313static ssize_t
2314chunksize_show(struct mddev *mddev, char *page)
2315{
2316        return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
2317}
2318
2319static ssize_t
2320chunksize_store(struct mddev *mddev, const char *buf, size_t len)
2321{
2322        /* Can only be changed when no bitmap is active */
2323        int rv;
2324        unsigned long csize;
2325        if (mddev->bitmap)
2326                return -EBUSY;
2327        rv = kstrtoul(buf, 10, &csize);
2328        if (rv)
2329                return rv;
2330        if (csize < 512 ||
2331            !is_power_of_2(csize))
2332                return -EINVAL;
2333        mddev->bitmap_info.chunksize = csize;
2334        return len;
2335}
2336
2337static struct md_sysfs_entry bitmap_chunksize =
2338__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
2339
2340static ssize_t metadata_show(struct mddev *mddev, char *page)
2341{
2342        if (mddev_is_clustered(mddev))
2343                return sprintf(page, "clustered\n");
2344        return sprintf(page, "%s\n", (mddev->bitmap_info.external
2345                                      ? "external" : "internal"));
2346}
2347
2348static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
2349{
2350        if (mddev->bitmap ||
2351            mddev->bitmap_info.file ||
2352            mddev->bitmap_info.offset)
2353                return -EBUSY;
2354        if (strncmp(buf, "external", 8) == 0)
2355                mddev->bitmap_info.external = 1;
2356        else if ((strncmp(buf, "internal", 8) == 0) ||
2357                        (strncmp(buf, "clustered", 9) == 0))
2358                mddev->bitmap_info.external = 0;
2359        else
2360                return -EINVAL;
2361        return len;
2362}
2363
2364static struct md_sysfs_entry bitmap_metadata =
2365__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2366
2367static ssize_t can_clear_show(struct mddev *mddev, char *page)
2368{
2369        int len;
2370        spin_lock(&mddev->lock);
2371        if (mddev->bitmap)
2372                len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
2373                                             "false" : "true"));
2374        else
2375                len = sprintf(page, "\n");
2376        spin_unlock(&mddev->lock);
2377        return len;
2378}
2379
2380static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
2381{
2382        if (mddev->bitmap == NULL)
2383                return -ENOENT;
2384        if (strncmp(buf, "false", 5) == 0)
2385                mddev->bitmap->need_sync = 1;
2386        else if (strncmp(buf, "true", 4) == 0) {
2387                if (mddev->degraded)
2388                        return -EBUSY;
2389                mddev->bitmap->need_sync = 0;
2390        } else
2391                return -EINVAL;
2392        return len;
2393}
2394
2395static struct md_sysfs_entry bitmap_can_clear =
2396__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
2397
2398static ssize_t
2399behind_writes_used_show(struct mddev *mddev, char *page)
2400{
2401        ssize_t ret;
2402        spin_lock(&mddev->lock);
2403        if (mddev->bitmap == NULL)
2404                ret = sprintf(page, "0\n");
2405        else
2406                ret = sprintf(page, "%lu\n",
2407                              mddev->bitmap->behind_writes_used);
2408        spin_unlock(&mddev->lock);
2409        return ret;
2410}
2411
2412static ssize_t
2413behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
2414{
2415        if (mddev->bitmap)
2416                mddev->bitmap->behind_writes_used = 0;
2417        return len;
2418}
2419
2420static struct md_sysfs_entry max_backlog_used =
2421__ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
2422       behind_writes_used_show, behind_writes_used_reset);
2423
2424static struct attribute *md_bitmap_attrs[] = {
2425        &bitmap_location.attr,
2426        &bitmap_space.attr,
2427        &bitmap_timeout.attr,
2428        &bitmap_backlog.attr,
2429        &bitmap_chunksize.attr,
2430        &bitmap_metadata.attr,
2431        &bitmap_can_clear.attr,
2432        &max_backlog_used.attr,
2433        NULL
2434};
2435struct attribute_group md_bitmap_group = {
2436        .name = "bitmap",
2437        .attrs = md_bitmap_attrs,
2438};
2439
2440