linux/drivers/md/raid0.c
<<
>>
Prefs
   1/*
   2   raid0.c : Multiple Devices driver for Linux
   3             Copyright (C) 1994-96 Marc ZYNGIER
   4             <zyngier@ufr-info-p7.ibp.fr> or
   5             <maz@gloups.fdn.fr>
   6             Copyright (C) 1999, 2000 Ingo Molnar, Red Hat
   7
   8
   9   RAID-0 management functions.
  10
  11   This program is free software; you can redistribute it and/or modify
  12   it under the terms of the GNU General Public License as published by
  13   the Free Software Foundation; either version 2, or (at your option)
  14   any later version.
  15   
  16   You should have received a copy of the GNU General Public License
  17   (for example /usr/src/linux/COPYING); if not, write to the Free
  18   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
  19*/
  20
  21#include <linux/blkdev.h>
  22#include <linux/seq_file.h>
  23#include <linux/slab.h>
  24#include "md.h"
  25#include "raid0.h"
  26#include "raid5.h"
  27
  28static int raid0_congested(void *data, int bits)
  29{
  30        mddev_t *mddev = data;
  31        raid0_conf_t *conf = mddev->private;
  32        mdk_rdev_t **devlist = conf->devlist;
  33        int raid_disks = conf->strip_zone[0].nb_dev;
  34        int i, ret = 0;
  35
  36        if (mddev_congested(mddev, bits))
  37                return 1;
  38
  39        for (i = 0; i < raid_disks && !ret ; i++) {
  40                struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
  41
  42                ret |= bdi_congested(&q->backing_dev_info, bits);
  43        }
  44        return ret;
  45}
  46
  47/*
  48 * inform the user of the raid configuration
  49*/
  50static void dump_zones(mddev_t *mddev)
  51{
  52        int j, k, h;
  53        sector_t zone_size = 0;
  54        sector_t zone_start = 0;
  55        char b[BDEVNAME_SIZE];
  56        raid0_conf_t *conf = mddev->private;
  57        int raid_disks = conf->strip_zone[0].nb_dev;
  58        printk(KERN_INFO "******* %s configuration *********\n",
  59                mdname(mddev));
  60        h = 0;
  61        for (j = 0; j < conf->nr_strip_zones; j++) {
  62                printk(KERN_INFO "zone%d=[", j);
  63                for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
  64                        printk(KERN_CONT "%s/",
  65                        bdevname(conf->devlist[j*raid_disks
  66                                                + k]->bdev, b));
  67                printk(KERN_CONT "]\n");
  68
  69                zone_size  = conf->strip_zone[j].zone_end - zone_start;
  70                printk(KERN_INFO "        zone offset=%llukb "
  71                                "device offset=%llukb size=%llukb\n",
  72                        (unsigned long long)zone_start>>1,
  73                        (unsigned long long)conf->strip_zone[j].dev_start>>1,
  74                        (unsigned long long)zone_size>>1);
  75                zone_start = conf->strip_zone[j].zone_end;
  76        }
  77        printk(KERN_INFO "**********************************\n\n");
  78}
  79
  80static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf)
  81{
  82        int i, c, err;
  83        sector_t curr_zone_end, sectors;
  84        mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev;
  85        struct strip_zone *zone;
  86        int cnt;
  87        char b[BDEVNAME_SIZE];
  88        raid0_conf_t *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
  89
  90        if (!conf)
  91                return -ENOMEM;
  92        list_for_each_entry(rdev1, &mddev->disks, same_set) {
  93                printk(KERN_INFO "md/raid0:%s: looking at %s\n",
  94                       mdname(mddev),
  95                       bdevname(rdev1->bdev, b));
  96                c = 0;
  97
  98                /* round size to chunk_size */
  99                sectors = rdev1->sectors;
 100                sector_div(sectors, mddev->chunk_sectors);
 101                rdev1->sectors = sectors * mddev->chunk_sectors;
 102
 103                list_for_each_entry(rdev2, &mddev->disks, same_set) {
 104                        printk(KERN_INFO "md/raid0:%s:   comparing %s(%llu)",
 105                               mdname(mddev),
 106                               bdevname(rdev1->bdev,b),
 107                               (unsigned long long)rdev1->sectors);
 108                        printk(KERN_CONT " with %s(%llu)\n",
 109                               bdevname(rdev2->bdev,b),
 110                               (unsigned long long)rdev2->sectors);
 111                        if (rdev2 == rdev1) {
 112                                printk(KERN_INFO "md/raid0:%s:   END\n",
 113                                       mdname(mddev));
 114                                break;
 115                        }
 116                        if (rdev2->sectors == rdev1->sectors) {
 117                                /*
 118                                 * Not unique, don't count it as a new
 119                                 * group
 120                                 */
 121                                printk(KERN_INFO "md/raid0:%s:   EQUAL\n",
 122                                       mdname(mddev));
 123                                c = 1;
 124                                break;
 125                        }
 126                        printk(KERN_INFO "md/raid0:%s:   NOT EQUAL\n",
 127                               mdname(mddev));
 128                }
 129                if (!c) {
 130                        printk(KERN_INFO "md/raid0:%s:   ==> UNIQUE\n",
 131                               mdname(mddev));
 132                        conf->nr_strip_zones++;
 133                        printk(KERN_INFO "md/raid0:%s: %d zones\n",
 134                               mdname(mddev), conf->nr_strip_zones);
 135                }
 136        }
 137        printk(KERN_INFO "md/raid0:%s: FINAL %d zones\n",
 138               mdname(mddev), conf->nr_strip_zones);
 139        err = -ENOMEM;
 140        conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
 141                                conf->nr_strip_zones, GFP_KERNEL);
 142        if (!conf->strip_zone)
 143                goto abort;
 144        conf->devlist = kzalloc(sizeof(mdk_rdev_t*)*
 145                                conf->nr_strip_zones*mddev->raid_disks,
 146                                GFP_KERNEL);
 147        if (!conf->devlist)
 148                goto abort;
 149
 150        /* The first zone must contain all devices, so here we check that
 151         * there is a proper alignment of slots to devices and find them all
 152         */
 153        zone = &conf->strip_zone[0];
 154        cnt = 0;
 155        smallest = NULL;
 156        dev = conf->devlist;
 157        err = -EINVAL;
 158        list_for_each_entry(rdev1, &mddev->disks, same_set) {
 159                int j = rdev1->raid_disk;
 160
 161                if (mddev->level == 10) {
 162                        /* taking over a raid10-n2 array */
 163                        j /= 2;
 164                        rdev1->new_raid_disk = j;
 165                }
 166
 167                if (mddev->level == 1) {
 168                        /* taiking over a raid1 array-
 169                         * we have only one active disk
 170                         */
 171                        j = 0;
 172                        rdev1->new_raid_disk = j;
 173                }
 174
 175                if (j < 0 || j >= mddev->raid_disks) {
 176                        printk(KERN_ERR "md/raid0:%s: bad disk number %d - "
 177                               "aborting!\n", mdname(mddev), j);
 178                        goto abort;
 179                }
 180                if (dev[j]) {
 181                        printk(KERN_ERR "md/raid0:%s: multiple devices for %d - "
 182                               "aborting!\n", mdname(mddev), j);
 183                        goto abort;
 184                }
 185                dev[j] = rdev1;
 186
 187                disk_stack_limits(mddev->gendisk, rdev1->bdev,
 188                                  rdev1->data_offset << 9);
 189                /* as we don't honour merge_bvec_fn, we must never risk
 190                 * violating it, so limit ->max_segments to 1, lying within
 191                 * a single page.
 192                 */
 193
 194                if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) {
 195                        blk_queue_max_segments(mddev->queue, 1);
 196                        blk_queue_segment_boundary(mddev->queue,
 197                                                   PAGE_CACHE_SIZE - 1);
 198                }
 199                if (!smallest || (rdev1->sectors < smallest->sectors))
 200                        smallest = rdev1;
 201                cnt++;
 202        }
 203        if (cnt != mddev->raid_disks) {
 204                printk(KERN_ERR "md/raid0:%s: too few disks (%d of %d) - "
 205                       "aborting!\n", mdname(mddev), cnt, mddev->raid_disks);
 206                goto abort;
 207        }
 208        zone->nb_dev = cnt;
 209        zone->zone_end = smallest->sectors * cnt;
 210
 211        curr_zone_end = zone->zone_end;
 212
 213        /* now do the other zones */
 214        for (i = 1; i < conf->nr_strip_zones; i++)
 215        {
 216                int j;
 217
 218                zone = conf->strip_zone + i;
 219                dev = conf->devlist + i * mddev->raid_disks;
 220
 221                printk(KERN_INFO "md/raid0:%s: zone %d\n",
 222                       mdname(mddev), i);
 223                zone->dev_start = smallest->sectors;
 224                smallest = NULL;
 225                c = 0;
 226
 227                for (j=0; j<cnt; j++) {
 228                        rdev = conf->devlist[j];
 229                        printk(KERN_INFO "md/raid0:%s: checking %s ...",
 230                               mdname(mddev),
 231                               bdevname(rdev->bdev, b));
 232                        if (rdev->sectors <= zone->dev_start) {
 233                                printk(KERN_CONT " nope.\n");
 234                                continue;
 235                        }
 236                        printk(KERN_CONT " contained as device %d\n", c);
 237                        dev[c] = rdev;
 238                        c++;
 239                        if (!smallest || rdev->sectors < smallest->sectors) {
 240                                smallest = rdev;
 241                                printk(KERN_INFO "md/raid0:%s:  (%llu) is smallest!.\n",
 242                                       mdname(mddev),
 243                                       (unsigned long long)rdev->sectors);
 244                        }
 245                }
 246
 247                zone->nb_dev = c;
 248                sectors = (smallest->sectors - zone->dev_start) * c;
 249                printk(KERN_INFO "md/raid0:%s: zone->nb_dev: %d, sectors: %llu\n",
 250                       mdname(mddev),
 251                       zone->nb_dev, (unsigned long long)sectors);
 252
 253                curr_zone_end += sectors;
 254                zone->zone_end = curr_zone_end;
 255
 256                printk(KERN_INFO "md/raid0:%s: current zone start: %llu\n",
 257                       mdname(mddev),
 258                       (unsigned long long)smallest->sectors);
 259        }
 260        mddev->queue->backing_dev_info.congested_fn = raid0_congested;
 261        mddev->queue->backing_dev_info.congested_data = mddev;
 262
 263        /*
 264         * now since we have the hard sector sizes, we can make sure
 265         * chunk size is a multiple of that sector size
 266         */
 267        if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) {
 268                printk(KERN_ERR "md/raid0:%s: chunk_size of %d not valid\n",
 269                       mdname(mddev),
 270                       mddev->chunk_sectors << 9);
 271                goto abort;
 272        }
 273
 274        blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
 275        blk_queue_io_opt(mddev->queue,
 276                         (mddev->chunk_sectors << 9) * mddev->raid_disks);
 277
 278        printk(KERN_INFO "md/raid0:%s: done.\n", mdname(mddev));
 279        *private_conf = conf;
 280
 281        return 0;
 282abort:
 283        kfree(conf->strip_zone);
 284        kfree(conf->devlist);
 285        kfree(conf);
 286        *private_conf = NULL;
 287        return err;
 288}
 289
 290/**
 291 *      raid0_mergeable_bvec -- tell bio layer if a two requests can be merged
 292 *      @q: request queue
 293 *      @bvm: properties of new bio
 294 *      @biovec: the request that could be merged to it.
 295 *
 296 *      Return amount of bytes we can accept at this offset
 297 */
 298static int raid0_mergeable_bvec(struct request_queue *q,
 299                                struct bvec_merge_data *bvm,
 300                                struct bio_vec *biovec)
 301{
 302        mddev_t *mddev = q->queuedata;
 303        sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
 304        int max;
 305        unsigned int chunk_sectors = mddev->chunk_sectors;
 306        unsigned int bio_sectors = bvm->bi_size >> 9;
 307
 308        if (is_power_of_2(chunk_sectors))
 309                max =  (chunk_sectors - ((sector & (chunk_sectors-1))
 310                                                + bio_sectors)) << 9;
 311        else
 312                max =  (chunk_sectors - (sector_div(sector, chunk_sectors)
 313                                                + bio_sectors)) << 9;
 314        if (max < 0) max = 0; /* bio_add cannot handle a negative return */
 315        if (max <= biovec->bv_len && bio_sectors == 0)
 316                return biovec->bv_len;
 317        else 
 318                return max;
 319}
 320
 321static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks)
 322{
 323        sector_t array_sectors = 0;
 324        mdk_rdev_t *rdev;
 325
 326        WARN_ONCE(sectors || raid_disks,
 327                  "%s does not support generic reshape\n", __func__);
 328
 329        list_for_each_entry(rdev, &mddev->disks, same_set)
 330                array_sectors += rdev->sectors;
 331
 332        return array_sectors;
 333}
 334
 335static int raid0_run(mddev_t *mddev)
 336{
 337        raid0_conf_t *conf;
 338        int ret;
 339
 340        if (mddev->chunk_sectors == 0) {
 341                printk(KERN_ERR "md/raid0:%s: chunk size must be set.\n",
 342                       mdname(mddev));
 343                return -EINVAL;
 344        }
 345        if (md_check_no_bitmap(mddev))
 346                return -EINVAL;
 347        blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
 348
 349        /* if private is not null, we are here after takeover */
 350        if (mddev->private == NULL) {
 351                ret = create_strip_zones(mddev, &conf);
 352                if (ret < 0)
 353                        return ret;
 354                mddev->private = conf;
 355        }
 356        conf = mddev->private;
 357
 358        /* calculate array device size */
 359        md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
 360
 361        printk(KERN_INFO "md/raid0:%s: md_size is %llu sectors.\n",
 362               mdname(mddev),
 363               (unsigned long long)mddev->array_sectors);
 364        /* calculate the max read-ahead size.
 365         * For read-ahead of large files to be effective, we need to
 366         * readahead at least twice a whole stripe. i.e. number of devices
 367         * multiplied by chunk size times 2.
 368         * If an individual device has an ra_pages greater than the
 369         * chunk size, then we will not drive that device as hard as it
 370         * wants.  We consider this a configuration error: a larger
 371         * chunksize should be used in that case.
 372         */
 373        {
 374                int stripe = mddev->raid_disks *
 375                        (mddev->chunk_sectors << 9) / PAGE_SIZE;
 376                if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
 377                        mddev->queue->backing_dev_info.ra_pages = 2* stripe;
 378        }
 379
 380        blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
 381        dump_zones(mddev);
 382        return md_integrity_register(mddev);
 383}
 384
 385static int raid0_stop(mddev_t *mddev)
 386{
 387        raid0_conf_t *conf = mddev->private;
 388
 389        blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 390        kfree(conf->strip_zone);
 391        kfree(conf->devlist);
 392        kfree(conf);
 393        mddev->private = NULL;
 394        return 0;
 395}
 396
 397/* Find the zone which holds a particular offset
 398 * Update *sectorp to be an offset in that zone
 399 */
 400static struct strip_zone *find_zone(struct raid0_private_data *conf,
 401                                    sector_t *sectorp)
 402{
 403        int i;
 404        struct strip_zone *z = conf->strip_zone;
 405        sector_t sector = *sectorp;
 406
 407        for (i = 0; i < conf->nr_strip_zones; i++)
 408                if (sector < z[i].zone_end) {
 409                        if (i)
 410                                *sectorp = sector - z[i-1].zone_end;
 411                        return z + i;
 412                }
 413        BUG();
 414}
 415
 416/*
 417 * remaps the bio to the target device. we separate two flows.
 418 * power 2 flow and a general flow for the sake of perfromance
 419*/
 420static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone,
 421                                sector_t sector, sector_t *sector_offset)
 422{
 423        unsigned int sect_in_chunk;
 424        sector_t chunk;
 425        raid0_conf_t *conf = mddev->private;
 426        int raid_disks = conf->strip_zone[0].nb_dev;
 427        unsigned int chunk_sects = mddev->chunk_sectors;
 428
 429        if (is_power_of_2(chunk_sects)) {
 430                int chunksect_bits = ffz(~chunk_sects);
 431                /* find the sector offset inside the chunk */
 432                sect_in_chunk  = sector & (chunk_sects - 1);
 433                sector >>= chunksect_bits;
 434                /* chunk in zone */
 435                chunk = *sector_offset;
 436                /* quotient is the chunk in real device*/
 437                sector_div(chunk, zone->nb_dev << chunksect_bits);
 438        } else{
 439                sect_in_chunk = sector_div(sector, chunk_sects);
 440                chunk = *sector_offset;
 441                sector_div(chunk, chunk_sects * zone->nb_dev);
 442        }
 443        /*
 444        *  position the bio over the real device
 445        *  real sector = chunk in device + starting of zone
 446        *       + the position in the chunk
 447        */
 448        *sector_offset = (chunk * chunk_sects) + sect_in_chunk;
 449        return conf->devlist[(zone - conf->strip_zone)*raid_disks
 450                             + sector_div(sector, zone->nb_dev)];
 451}
 452
 453/*
 454 * Is io distribute over 1 or more chunks ?
 455*/
 456static inline int is_io_in_chunk_boundary(mddev_t *mddev,
 457                        unsigned int chunk_sects, struct bio *bio)
 458{
 459        if (likely(is_power_of_2(chunk_sects))) {
 460                return chunk_sects >= ((bio->bi_sector & (chunk_sects-1))
 461                                        + (bio->bi_size >> 9));
 462        } else{
 463                sector_t sector = bio->bi_sector;
 464                return chunk_sects >= (sector_div(sector, chunk_sects)
 465                                                + (bio->bi_size >> 9));
 466        }
 467}
 468
 469static int raid0_make_request(mddev_t *mddev, struct bio *bio)
 470{
 471        unsigned int chunk_sects;
 472        sector_t sector_offset;
 473        struct strip_zone *zone;
 474        mdk_rdev_t *tmp_dev;
 475
 476        if (unlikely(bio->bi_rw & REQ_FLUSH)) {
 477                md_flush_request(mddev, bio);
 478                return 0;
 479        }
 480
 481        chunk_sects = mddev->chunk_sectors;
 482        if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) {
 483                sector_t sector = bio->bi_sector;
 484                struct bio_pair *bp;
 485                /* Sanity check -- queue functions should prevent this happening */
 486                if (bio->bi_vcnt != 1 ||
 487                    bio->bi_idx != 0)
 488                        goto bad_map;
 489                /* This is a one page bio that upper layers
 490                 * refuse to split for us, so we need to split it.
 491                 */
 492                if (likely(is_power_of_2(chunk_sects)))
 493                        bp = bio_split(bio, chunk_sects - (sector &
 494                                                           (chunk_sects-1)));
 495                else
 496                        bp = bio_split(bio, chunk_sects -
 497                                       sector_div(sector, chunk_sects));
 498                if (raid0_make_request(mddev, &bp->bio1))
 499                        generic_make_request(&bp->bio1);
 500                if (raid0_make_request(mddev, &bp->bio2))
 501                        generic_make_request(&bp->bio2);
 502
 503                bio_pair_release(bp);
 504                return 0;
 505        }
 506
 507        sector_offset = bio->bi_sector;
 508        zone =  find_zone(mddev->private, &sector_offset);
 509        tmp_dev = map_sector(mddev, zone, bio->bi_sector,
 510                             &sector_offset);
 511        bio->bi_bdev = tmp_dev->bdev;
 512        bio->bi_sector = sector_offset + zone->dev_start +
 513                tmp_dev->data_offset;
 514        /*
 515         * Let the main block layer submit the IO and resolve recursion:
 516         */
 517        return 1;
 518
 519bad_map:
 520        printk("md/raid0:%s: make_request bug: can't convert block across chunks"
 521               " or bigger than %dk %llu %d\n",
 522               mdname(mddev), chunk_sects / 2,
 523               (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
 524
 525        bio_io_error(bio);
 526        return 0;
 527}
 528
 529static void raid0_status(struct seq_file *seq, mddev_t *mddev)
 530{
 531#undef MD_DEBUG
 532#ifdef MD_DEBUG
 533        int j, k, h;
 534        char b[BDEVNAME_SIZE];
 535        raid0_conf_t *conf = mddev->private;
 536        int raid_disks = conf->strip_zone[0].nb_dev;
 537
 538        sector_t zone_size;
 539        sector_t zone_start = 0;
 540        h = 0;
 541
 542        for (j = 0; j < conf->nr_strip_zones; j++) {
 543                seq_printf(seq, "      z%d", j);
 544                seq_printf(seq, "=[");
 545                for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
 546                        seq_printf(seq, "%s/", bdevname(
 547                                conf->devlist[j*raid_disks + k]
 548                                                ->bdev, b));
 549
 550                zone_size  = conf->strip_zone[j].zone_end - zone_start;
 551                seq_printf(seq, "] ze=%lld ds=%lld s=%lld\n",
 552                        (unsigned long long)zone_start>>1,
 553                        (unsigned long long)conf->strip_zone[j].dev_start>>1,
 554                        (unsigned long long)zone_size>>1);
 555                zone_start = conf->strip_zone[j].zone_end;
 556        }
 557#endif
 558        seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2);
 559        return;
 560}
 561
 562static void *raid0_takeover_raid45(mddev_t *mddev)
 563{
 564        mdk_rdev_t *rdev;
 565        raid0_conf_t *priv_conf;
 566
 567        if (mddev->degraded != 1) {
 568                printk(KERN_ERR "md/raid0:%s: raid5 must be degraded! Degraded disks: %d\n",
 569                       mdname(mddev),
 570                       mddev->degraded);
 571                return ERR_PTR(-EINVAL);
 572        }
 573
 574        list_for_each_entry(rdev, &mddev->disks, same_set) {
 575                /* check slot number for a disk */
 576                if (rdev->raid_disk == mddev->raid_disks-1) {
 577                        printk(KERN_ERR "md/raid0:%s: raid5 must have missing parity disk!\n",
 578                               mdname(mddev));
 579                        return ERR_PTR(-EINVAL);
 580                }
 581        }
 582
 583        /* Set new parameters */
 584        mddev->new_level = 0;
 585        mddev->new_layout = 0;
 586        mddev->new_chunk_sectors = mddev->chunk_sectors;
 587        mddev->raid_disks--;
 588        mddev->delta_disks = -1;
 589        /* make sure it will be not marked as dirty */
 590        mddev->recovery_cp = MaxSector;
 591
 592        create_strip_zones(mddev, &priv_conf);
 593        return priv_conf;
 594}
 595
 596static void *raid0_takeover_raid10(mddev_t *mddev)
 597{
 598        raid0_conf_t *priv_conf;
 599
 600        /* Check layout:
 601         *  - far_copies must be 1
 602         *  - near_copies must be 2
 603         *  - disks number must be even
 604         *  - all mirrors must be already degraded
 605         */
 606        if (mddev->layout != ((1 << 8) + 2)) {
 607                printk(KERN_ERR "md/raid0:%s:: Raid0 cannot takover layout: 0x%x\n",
 608                       mdname(mddev),
 609                       mddev->layout);
 610                return ERR_PTR(-EINVAL);
 611        }
 612        if (mddev->raid_disks & 1) {
 613                printk(KERN_ERR "md/raid0:%s: Raid0 cannot takover Raid10 with odd disk number.\n",
 614                       mdname(mddev));
 615                return ERR_PTR(-EINVAL);
 616        }
 617        if (mddev->degraded != (mddev->raid_disks>>1)) {
 618                printk(KERN_ERR "md/raid0:%s: All mirrors must be already degraded!\n",
 619                       mdname(mddev));
 620                return ERR_PTR(-EINVAL);
 621        }
 622
 623        /* Set new parameters */
 624        mddev->new_level = 0;
 625        mddev->new_layout = 0;
 626        mddev->new_chunk_sectors = mddev->chunk_sectors;
 627        mddev->delta_disks = - mddev->raid_disks / 2;
 628        mddev->raid_disks += mddev->delta_disks;
 629        mddev->degraded = 0;
 630        /* make sure it will be not marked as dirty */
 631        mddev->recovery_cp = MaxSector;
 632
 633        create_strip_zones(mddev, &priv_conf);
 634        return priv_conf;
 635}
 636
 637static void *raid0_takeover_raid1(mddev_t *mddev)
 638{
 639        raid0_conf_t *priv_conf;
 640
 641        /* Check layout:
 642         *  - (N - 1) mirror drives must be already faulty
 643         */
 644        if ((mddev->raid_disks - 1) != mddev->degraded) {
 645                printk(KERN_ERR "md/raid0:%s: (N - 1) mirrors drives must be already faulty!\n",
 646                       mdname(mddev));
 647                return ERR_PTR(-EINVAL);
 648        }
 649
 650        /* Set new parameters */
 651        mddev->new_level = 0;
 652        mddev->new_layout = 0;
 653        mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */
 654        mddev->delta_disks = 1 - mddev->raid_disks;
 655        mddev->raid_disks = 1;
 656        /* make sure it will be not marked as dirty */
 657        mddev->recovery_cp = MaxSector;
 658
 659        create_strip_zones(mddev, &priv_conf);
 660        return priv_conf;
 661}
 662
 663static void *raid0_takeover(mddev_t *mddev)
 664{
 665        /* raid0 can take over:
 666         *  raid4 - if all data disks are active.
 667         *  raid5 - providing it is Raid4 layout and one disk is faulty
 668         *  raid10 - assuming we have all necessary active disks
 669         *  raid1 - with (N -1) mirror drives faulty
 670         */
 671        if (mddev->level == 4)
 672                return raid0_takeover_raid45(mddev);
 673
 674        if (mddev->level == 5) {
 675                if (mddev->layout == ALGORITHM_PARITY_N)
 676                        return raid0_takeover_raid45(mddev);
 677
 678                printk(KERN_ERR "md/raid0:%s: Raid can only takeover Raid5 with layout: %d\n",
 679                       mdname(mddev), ALGORITHM_PARITY_N);
 680        }
 681
 682        if (mddev->level == 10)
 683                return raid0_takeover_raid10(mddev);
 684
 685        if (mddev->level == 1)
 686                return raid0_takeover_raid1(mddev);
 687
 688        printk(KERN_ERR "Takeover from raid%i to raid0 not supported\n",
 689                mddev->level);
 690
 691        return ERR_PTR(-EINVAL);
 692}
 693
 694static void raid0_quiesce(mddev_t *mddev, int state)
 695{
 696}
 697
 698static struct mdk_personality raid0_personality=
 699{
 700        .name           = "raid0",
 701        .level          = 0,
 702        .owner          = THIS_MODULE,
 703        .make_request   = raid0_make_request,
 704        .run            = raid0_run,
 705        .stop           = raid0_stop,
 706        .status         = raid0_status,
 707        .size           = raid0_size,
 708        .takeover       = raid0_takeover,
 709        .quiesce        = raid0_quiesce,
 710};
 711
 712static int __init raid0_init (void)
 713{
 714        return register_md_personality (&raid0_personality);
 715}
 716
 717static void raid0_exit (void)
 718{
 719        unregister_md_personality (&raid0_personality);
 720}
 721
 722module_init(raid0_init);
 723module_exit(raid0_exit);
 724MODULE_LICENSE("GPL");
 725MODULE_DESCRIPTION("RAID0 (striping) personality for MD");
 726MODULE_ALIAS("md-personality-2"); /* RAID0 */
 727MODULE_ALIAS("md-raid0");
 728MODULE_ALIAS("md-level-0");
 729