1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/blkdev.h>
37#include <linux/sysctl.h>
38#include <linux/seq_file.h>
39#include <linux/buffer_head.h>
40#include <linux/poll.h>
41#include <linux/ctype.h>
42#include <linux/hdreg.h>
43#include <linux/proc_fs.h>
44#include <linux/random.h>
45#include <linux/reboot.h>
46#include <linux/file.h>
47#include <linux/delay.h>
48#include <linux/raid/md_p.h>
49#include <linux/raid/md_u.h>
50#include "md.h"
51#include "bitmap.h"
52
53#define DEBUG 0
54#define dprintk(x...) ((void)(DEBUG && printk(x)))
55
56
57#ifndef MODULE
58static void autostart_arrays(int part);
59#endif
60
61static LIST_HEAD(pers_list);
62static DEFINE_SPINLOCK(pers_lock);
63
64static void md_print_devices(void);
65
66static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
67
68#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83static int sysctl_speed_limit_min = 1000;
84static int sysctl_speed_limit_max = 200000;
85static inline int speed_min(mddev_t *mddev)
86{
87 return mddev->sync_speed_min ?
88 mddev->sync_speed_min : sysctl_speed_limit_min;
89}
90
91static inline int speed_max(mddev_t *mddev)
92{
93 return mddev->sync_speed_max ?
94 mddev->sync_speed_max : sysctl_speed_limit_max;
95}
96
97static struct ctl_table_header *raid_table_header;
98
99static ctl_table raid_table[] = {
100 {
101 .ctl_name = DEV_RAID_SPEED_LIMIT_MIN,
102 .procname = "speed_limit_min",
103 .data = &sysctl_speed_limit_min,
104 .maxlen = sizeof(int),
105 .mode = S_IRUGO|S_IWUSR,
106 .proc_handler = &proc_dointvec,
107 },
108 {
109 .ctl_name = DEV_RAID_SPEED_LIMIT_MAX,
110 .procname = "speed_limit_max",
111 .data = &sysctl_speed_limit_max,
112 .maxlen = sizeof(int),
113 .mode = S_IRUGO|S_IWUSR,
114 .proc_handler = &proc_dointvec,
115 },
116 { .ctl_name = 0 }
117};
118
119static ctl_table raid_dir_table[] = {
120 {
121 .ctl_name = DEV_RAID,
122 .procname = "raid",
123 .maxlen = 0,
124 .mode = S_IRUGO|S_IXUGO,
125 .child = raid_table,
126 },
127 { .ctl_name = 0 }
128};
129
130static ctl_table raid_root_table[] = {
131 {
132 .ctl_name = CTL_DEV,
133 .procname = "dev",
134 .maxlen = 0,
135 .mode = 0555,
136 .child = raid_dir_table,
137 },
138 { .ctl_name = 0 }
139};
140
141static const struct block_device_operations md_fops;
142
143static int start_readonly;
144
145
146
147
148
149
150
151
152
153
154
155static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
156static atomic_t md_event_count;
157void md_new_event(mddev_t *mddev)
158{
159 atomic_inc(&md_event_count);
160 wake_up(&md_event_waiters);
161}
162EXPORT_SYMBOL_GPL(md_new_event);
163
164
165
166
167static void md_new_event_inintr(mddev_t *mddev)
168{
169 atomic_inc(&md_event_count);
170 wake_up(&md_event_waiters);
171}
172
173
174
175
176
177static LIST_HEAD(all_mddevs);
178static DEFINE_SPINLOCK(all_mddevs_lock);
179
180
181
182
183
184
185
186
187
188#define for_each_mddev(mddev,tmp) \
189 \
190 for (({ spin_lock(&all_mddevs_lock); \
191 tmp = all_mddevs.next; \
192 mddev = NULL;}); \
193 ({ if (tmp != &all_mddevs) \
194 mddev_get(list_entry(tmp, mddev_t, all_mddevs));\
195 spin_unlock(&all_mddevs_lock); \
196 if (mddev) mddev_put(mddev); \
197 mddev = list_entry(tmp, mddev_t, all_mddevs); \
198 tmp != &all_mddevs;}); \
199 ({ spin_lock(&all_mddevs_lock); \
200 tmp = tmp->next;}) \
201 )
202
203
204
205
206
207
208
209
210
211static int md_make_request(struct request_queue *q, struct bio *bio)
212{
213 mddev_t *mddev = q->queuedata;
214 int rv;
215 if (mddev == NULL || mddev->pers == NULL) {
216 bio_io_error(bio);
217 return 0;
218 }
219 rcu_read_lock();
220 if (mddev->suspended) {
221 DEFINE_WAIT(__wait);
222 for (;;) {
223 prepare_to_wait(&mddev->sb_wait, &__wait,
224 TASK_UNINTERRUPTIBLE);
225 if (!mddev->suspended)
226 break;
227 rcu_read_unlock();
228 schedule();
229 rcu_read_lock();
230 }
231 finish_wait(&mddev->sb_wait, &__wait);
232 }
233 atomic_inc(&mddev->active_io);
234 rcu_read_unlock();
235 rv = mddev->pers->make_request(q, bio);
236 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
237 wake_up(&mddev->sb_wait);
238
239 return rv;
240}
241
242static void mddev_suspend(mddev_t *mddev)
243{
244 BUG_ON(mddev->suspended);
245 mddev->suspended = 1;
246 synchronize_rcu();
247 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
248 mddev->pers->quiesce(mddev, 1);
249 md_unregister_thread(mddev->thread);
250 mddev->thread = NULL;
251
252
253
254
255
256}
257
258static void mddev_resume(mddev_t *mddev)
259{
260 mddev->suspended = 0;
261 wake_up(&mddev->sb_wait);
262 mddev->pers->quiesce(mddev, 0);
263}
264
265int mddev_congested(mddev_t *mddev, int bits)
266{
267 return mddev->suspended;
268}
269EXPORT_SYMBOL(mddev_congested);
270
271
272static inline mddev_t *mddev_get(mddev_t *mddev)
273{
274 atomic_inc(&mddev->active);
275 return mddev;
276}
277
278static void mddev_delayed_delete(struct work_struct *ws);
279
280static void mddev_put(mddev_t *mddev)
281{
282 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
283 return;
284 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
285 !mddev->hold_active) {
286 list_del(&mddev->all_mddevs);
287 if (mddev->gendisk) {
288
289
290
291
292
293
294 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
295 schedule_work(&mddev->del_work);
296 } else
297 kfree(mddev);
298 }
299 spin_unlock(&all_mddevs_lock);
300}
301
302static mddev_t * mddev_find(dev_t unit)
303{
304 mddev_t *mddev, *new = NULL;
305
306 retry:
307 spin_lock(&all_mddevs_lock);
308
309 if (unit) {
310 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
311 if (mddev->unit == unit) {
312 mddev_get(mddev);
313 spin_unlock(&all_mddevs_lock);
314 kfree(new);
315 return mddev;
316 }
317
318 if (new) {
319 list_add(&new->all_mddevs, &all_mddevs);
320 spin_unlock(&all_mddevs_lock);
321 new->hold_active = UNTIL_IOCTL;
322 return new;
323 }
324 } else if (new) {
325
326 static int next_minor = 512;
327 int start = next_minor;
328 int is_free = 0;
329 int dev = 0;
330 while (!is_free) {
331 dev = MKDEV(MD_MAJOR, next_minor);
332 next_minor++;
333 if (next_minor > MINORMASK)
334 next_minor = 0;
335 if (next_minor == start) {
336
337 spin_unlock(&all_mddevs_lock);
338 kfree(new);
339 return NULL;
340 }
341
342 is_free = 1;
343 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
344 if (mddev->unit == dev) {
345 is_free = 0;
346 break;
347 }
348 }
349 new->unit = dev;
350 new->md_minor = MINOR(dev);
351 new->hold_active = UNTIL_STOP;
352 list_add(&new->all_mddevs, &all_mddevs);
353 spin_unlock(&all_mddevs_lock);
354 return new;
355 }
356 spin_unlock(&all_mddevs_lock);
357
358 new = kzalloc(sizeof(*new), GFP_KERNEL);
359 if (!new)
360 return NULL;
361
362 new->unit = unit;
363 if (MAJOR(unit) == MD_MAJOR)
364 new->md_minor = MINOR(unit);
365 else
366 new->md_minor = MINOR(unit) >> MdpMinorShift;
367
368 mutex_init(&new->open_mutex);
369 mutex_init(&new->reconfig_mutex);
370 INIT_LIST_HEAD(&new->disks);
371 INIT_LIST_HEAD(&new->all_mddevs);
372 init_timer(&new->safemode_timer);
373 atomic_set(&new->active, 1);
374 atomic_set(&new->openers, 0);
375 atomic_set(&new->active_io, 0);
376 spin_lock_init(&new->write_lock);
377 init_waitqueue_head(&new->sb_wait);
378 init_waitqueue_head(&new->recovery_wait);
379 new->reshape_position = MaxSector;
380 new->resync_min = 0;
381 new->resync_max = MaxSector;
382 new->level = LEVEL_NONE;
383
384 goto retry;
385}
386
387static inline int mddev_lock(mddev_t * mddev)
388{
389 return mutex_lock_interruptible(&mddev->reconfig_mutex);
390}
391
392static inline int mddev_is_locked(mddev_t *mddev)
393{
394 return mutex_is_locked(&mddev->reconfig_mutex);
395}
396
397static inline int mddev_trylock(mddev_t * mddev)
398{
399 return mutex_trylock(&mddev->reconfig_mutex);
400}
401
402static inline void mddev_unlock(mddev_t * mddev)
403{
404 mutex_unlock(&mddev->reconfig_mutex);
405
406 md_wakeup_thread(mddev->thread);
407}
408
409static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
410{
411 mdk_rdev_t *rdev;
412
413 list_for_each_entry(rdev, &mddev->disks, same_set)
414 if (rdev->desc_nr == nr)
415 return rdev;
416
417 return NULL;
418}
419
420static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
421{
422 mdk_rdev_t *rdev;
423
424 list_for_each_entry(rdev, &mddev->disks, same_set)
425 if (rdev->bdev->bd_dev == dev)
426 return rdev;
427
428 return NULL;
429}
430
431static struct mdk_personality *find_pers(int level, char *clevel)
432{
433 struct mdk_personality *pers;
434 list_for_each_entry(pers, &pers_list, list) {
435 if (level != LEVEL_NONE && pers->level == level)
436 return pers;
437 if (strcmp(pers->name, clevel)==0)
438 return pers;
439 }
440 return NULL;
441}
442
443
444static inline sector_t calc_dev_sboffset(struct block_device *bdev)
445{
446 sector_t num_sectors = bdev->bd_inode->i_size / 512;
447 return MD_NEW_SIZE_SECTORS(num_sectors);
448}
449
450static int alloc_disk_sb(mdk_rdev_t * rdev)
451{
452 if (rdev->sb_page)
453 MD_BUG();
454
455 rdev->sb_page = alloc_page(GFP_KERNEL);
456 if (!rdev->sb_page) {
457 printk(KERN_ALERT "md: out of memory.\n");
458 return -ENOMEM;
459 }
460
461 return 0;
462}
463
464static void free_disk_sb(mdk_rdev_t * rdev)
465{
466 if (rdev->sb_page) {
467 put_page(rdev->sb_page);
468 rdev->sb_loaded = 0;
469 rdev->sb_page = NULL;
470 rdev->sb_start = 0;
471 rdev->sectors = 0;
472 }
473}
474
475
476static void super_written(struct bio *bio, int error)
477{
478 mdk_rdev_t *rdev = bio->bi_private;
479 mddev_t *mddev = rdev->mddev;
480
481 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
482 printk("md: super_written gets error=%d, uptodate=%d\n",
483 error, test_bit(BIO_UPTODATE, &bio->bi_flags));
484 WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
485 md_error(mddev, rdev);
486 }
487
488 if (atomic_dec_and_test(&mddev->pending_writes))
489 wake_up(&mddev->sb_wait);
490 bio_put(bio);
491}
492
493static void super_written_barrier(struct bio *bio, int error)
494{
495 struct bio *bio2 = bio->bi_private;
496 mdk_rdev_t *rdev = bio2->bi_private;
497 mddev_t *mddev = rdev->mddev;
498
499 if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
500 error == -EOPNOTSUPP) {
501 unsigned long flags;
502
503 set_bit(BarriersNotsupp, &rdev->flags);
504 mddev->barriers_work = 0;
505 spin_lock_irqsave(&mddev->write_lock, flags);
506 bio2->bi_next = mddev->biolist;
507 mddev->biolist = bio2;
508 spin_unlock_irqrestore(&mddev->write_lock, flags);
509 wake_up(&mddev->sb_wait);
510 bio_put(bio);
511 } else {
512 bio_put(bio2);
513 bio->bi_private = rdev;
514 super_written(bio, error);
515 }
516}
517
518void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
519 sector_t sector, int size, struct page *page)
520{
521
522
523
524
525
526
527
528
529
530 struct bio *bio = bio_alloc(GFP_NOIO, 1);
531 int rw = (1<<BIO_RW) | (1<<BIO_RW_SYNCIO) | (1<<BIO_RW_UNPLUG);
532
533 bio->bi_bdev = rdev->bdev;
534 bio->bi_sector = sector;
535 bio_add_page(bio, page, size, 0);
536 bio->bi_private = rdev;
537 bio->bi_end_io = super_written;
538 bio->bi_rw = rw;
539
540 atomic_inc(&mddev->pending_writes);
541 if (!test_bit(BarriersNotsupp, &rdev->flags)) {
542 struct bio *rbio;
543 rw |= (1<<BIO_RW_BARRIER);
544 rbio = bio_clone(bio, GFP_NOIO);
545 rbio->bi_private = bio;
546 rbio->bi_end_io = super_written_barrier;
547 submit_bio(rw, rbio);
548 } else
549 submit_bio(rw, bio);
550}
551
552void md_super_wait(mddev_t *mddev)
553{
554
555
556
557 DEFINE_WAIT(wq);
558 for(;;) {
559 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
560 if (atomic_read(&mddev->pending_writes)==0)
561 break;
562 while (mddev->biolist) {
563 struct bio *bio;
564 spin_lock_irq(&mddev->write_lock);
565 bio = mddev->biolist;
566 mddev->biolist = bio->bi_next ;
567 bio->bi_next = NULL;
568 spin_unlock_irq(&mddev->write_lock);
569 submit_bio(bio->bi_rw, bio);
570 }
571 schedule();
572 }
573 finish_wait(&mddev->sb_wait, &wq);
574}
575
576static void bi_complete(struct bio *bio, int error)
577{
578 complete((struct completion*)bio->bi_private);
579}
580
581int sync_page_io(struct block_device *bdev, sector_t sector, int size,
582 struct page *page, int rw)
583{
584 struct bio *bio = bio_alloc(GFP_NOIO, 1);
585 struct completion event;
586 int ret;
587
588 rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
589
590 bio->bi_bdev = bdev;
591 bio->bi_sector = sector;
592 bio_add_page(bio, page, size, 0);
593 init_completion(&event);
594 bio->bi_private = &event;
595 bio->bi_end_io = bi_complete;
596 submit_bio(rw, bio);
597 wait_for_completion(&event);
598
599 ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
600 bio_put(bio);
601 return ret;
602}
603EXPORT_SYMBOL_GPL(sync_page_io);
604
605static int read_disk_sb(mdk_rdev_t * rdev, int size)
606{
607 char b[BDEVNAME_SIZE];
608 if (!rdev->sb_page) {
609 MD_BUG();
610 return -EINVAL;
611 }
612 if (rdev->sb_loaded)
613 return 0;
614
615
616 if (!sync_page_io(rdev->bdev, rdev->sb_start, size, rdev->sb_page, READ))
617 goto fail;
618 rdev->sb_loaded = 1;
619 return 0;
620
621fail:
622 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
623 bdevname(rdev->bdev,b));
624 return -EINVAL;
625}
626
627static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
628{
629 return sb1->set_uuid0 == sb2->set_uuid0 &&
630 sb1->set_uuid1 == sb2->set_uuid1 &&
631 sb1->set_uuid2 == sb2->set_uuid2 &&
632 sb1->set_uuid3 == sb2->set_uuid3;
633}
634
635static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
636{
637 int ret;
638 mdp_super_t *tmp1, *tmp2;
639
640 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
641 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
642
643 if (!tmp1 || !tmp2) {
644 ret = 0;
645 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
646 goto abort;
647 }
648
649 *tmp1 = *sb1;
650 *tmp2 = *sb2;
651
652
653
654
655 tmp1->nr_disks = 0;
656 tmp2->nr_disks = 0;
657
658 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
659abort:
660 kfree(tmp1);
661 kfree(tmp2);
662 return ret;
663}
664
665
666static u32 md_csum_fold(u32 csum)
667{
668 csum = (csum & 0xffff) + (csum >> 16);
669 return (csum & 0xffff) + (csum >> 16);
670}
671
672static unsigned int calc_sb_csum(mdp_super_t * sb)
673{
674 u64 newcsum = 0;
675 u32 *sb32 = (u32*)sb;
676 int i;
677 unsigned int disk_csum, csum;
678
679 disk_csum = sb->sb_csum;
680 sb->sb_csum = 0;
681
682 for (i = 0; i < MD_SB_BYTES/4 ; i++)
683 newcsum += sb32[i];
684 csum = (newcsum & 0xffffffff) + (newcsum>>32);
685
686
687#ifdef CONFIG_ALPHA
688
689
690
691
692
693
694
695
696 sb->sb_csum = md_csum_fold(disk_csum);
697#else
698 sb->sb_csum = disk_csum;
699#endif
700 return csum;
701}
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734struct super_type {
735 char *name;
736 struct module *owner;
737 int (*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev,
738 int minor_version);
739 int (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev);
740 void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev);
741 unsigned long long (*rdev_size_change)(mdk_rdev_t *rdev,
742 sector_t num_sectors);
743};
744
745
746
747
748
749
750
751
752
753int md_check_no_bitmap(mddev_t *mddev)
754{
755 if (!mddev->bitmap_file && !mddev->bitmap_offset)
756 return 0;
757 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
758 mdname(mddev), mddev->pers->name);
759 return 1;
760}
761EXPORT_SYMBOL(md_check_no_bitmap);
762
763
764
765
766static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
767{
768 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
769 mdp_super_t *sb;
770 int ret;
771
772
773
774
775
776
777
778 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
779
780 ret = read_disk_sb(rdev, MD_SB_BYTES);
781 if (ret) return ret;
782
783 ret = -EINVAL;
784
785 bdevname(rdev->bdev, b);
786 sb = (mdp_super_t*)page_address(rdev->sb_page);
787
788 if (sb->md_magic != MD_SB_MAGIC) {
789 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
790 b);
791 goto abort;
792 }
793
794 if (sb->major_version != 0 ||
795 sb->minor_version < 90 ||
796 sb->minor_version > 91) {
797 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
798 sb->major_version, sb->minor_version,
799 b);
800 goto abort;
801 }
802
803 if (sb->raid_disks <= 0)
804 goto abort;
805
806 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
807 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
808 b);
809 goto abort;
810 }
811
812 rdev->preferred_minor = sb->md_minor;
813 rdev->data_offset = 0;
814 rdev->sb_size = MD_SB_BYTES;
815
816 if (sb->level == LEVEL_MULTIPATH)
817 rdev->desc_nr = -1;
818 else
819 rdev->desc_nr = sb->this_disk.number;
820
821 if (!refdev) {
822 ret = 1;
823 } else {
824 __u64 ev1, ev2;
825 mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page);
826 if (!uuid_equal(refsb, sb)) {
827 printk(KERN_WARNING "md: %s has different UUID to %s\n",
828 b, bdevname(refdev->bdev,b2));
829 goto abort;
830 }
831 if (!sb_equal(refsb, sb)) {
832 printk(KERN_WARNING "md: %s has same UUID"
833 " but different superblock to %s\n",
834 b, bdevname(refdev->bdev, b2));
835 goto abort;
836 }
837 ev1 = md_event(sb);
838 ev2 = md_event(refsb);
839 if (ev1 > ev2)
840 ret = 1;
841 else
842 ret = 0;
843 }
844 rdev->sectors = rdev->sb_start;
845
846 if (rdev->sectors < sb->size * 2 && sb->level > 1)
847
848 ret = -EINVAL;
849
850 abort:
851 return ret;
852}
853
854
855
856
857static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
858{
859 mdp_disk_t *desc;
860 mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
861 __u64 ev1 = md_event(sb);
862
863 rdev->raid_disk = -1;
864 clear_bit(Faulty, &rdev->flags);
865 clear_bit(In_sync, &rdev->flags);
866 clear_bit(WriteMostly, &rdev->flags);
867 clear_bit(BarriersNotsupp, &rdev->flags);
868
869 if (mddev->raid_disks == 0) {
870 mddev->major_version = 0;
871 mddev->minor_version = sb->minor_version;
872 mddev->patch_version = sb->patch_version;
873 mddev->external = 0;
874 mddev->chunk_sectors = sb->chunk_size >> 9;
875 mddev->ctime = sb->ctime;
876 mddev->utime = sb->utime;
877 mddev->level = sb->level;
878 mddev->clevel[0] = 0;
879 mddev->layout = sb->layout;
880 mddev->raid_disks = sb->raid_disks;
881 mddev->dev_sectors = sb->size * 2;
882 mddev->events = ev1;
883 mddev->bitmap_offset = 0;
884 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
885
886 if (mddev->minor_version >= 91) {
887 mddev->reshape_position = sb->reshape_position;
888 mddev->delta_disks = sb->delta_disks;
889 mddev->new_level = sb->new_level;
890 mddev->new_layout = sb->new_layout;
891 mddev->new_chunk_sectors = sb->new_chunk >> 9;
892 } else {
893 mddev->reshape_position = MaxSector;
894 mddev->delta_disks = 0;
895 mddev->new_level = mddev->level;
896 mddev->new_layout = mddev->layout;
897 mddev->new_chunk_sectors = mddev->chunk_sectors;
898 }
899
900 if (sb->state & (1<<MD_SB_CLEAN))
901 mddev->recovery_cp = MaxSector;
902 else {
903 if (sb->events_hi == sb->cp_events_hi &&
904 sb->events_lo == sb->cp_events_lo) {
905 mddev->recovery_cp = sb->recovery_cp;
906 } else
907 mddev->recovery_cp = 0;
908 }
909
910 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
911 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
912 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
913 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
914
915 mddev->max_disks = MD_SB_DISKS;
916
917 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
918 mddev->bitmap_file == NULL)
919 mddev->bitmap_offset = mddev->default_bitmap_offset;
920
921 } else if (mddev->pers == NULL) {
922
923 ++ev1;
924 if (ev1 < mddev->events)
925 return -EINVAL;
926 } else if (mddev->bitmap) {
927
928
929
930 if (ev1 < mddev->bitmap->events_cleared)
931 return 0;
932 } else {
933 if (ev1 < mddev->events)
934
935 return 0;
936 }
937
938 if (mddev->level != LEVEL_MULTIPATH) {
939 desc = sb->disks + rdev->desc_nr;
940
941 if (desc->state & (1<<MD_DISK_FAULTY))
942 set_bit(Faulty, &rdev->flags);
943 else if (desc->state & (1<<MD_DISK_SYNC)
944) {
945 set_bit(In_sync, &rdev->flags);
946 rdev->raid_disk = desc->raid_disk;
947 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
948
949
950
951 if (mddev->minor_version >= 91) {
952 rdev->recovery_offset = 0;
953 rdev->raid_disk = desc->raid_disk;
954 }
955 }
956 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
957 set_bit(WriteMostly, &rdev->flags);
958 } else
959 set_bit(In_sync, &rdev->flags);
960 return 0;
961}
962
963
964
965
966static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
967{
968 mdp_super_t *sb;
969 mdk_rdev_t *rdev2;
970 int next_spare = mddev->raid_disks;
971
972
973
974
975
976
977
978
979
980
981
982
983 int i;
984 int active=0, working=0,failed=0,spare=0,nr_disks=0;
985
986 rdev->sb_size = MD_SB_BYTES;
987
988 sb = (mdp_super_t*)page_address(rdev->sb_page);
989
990 memset(sb, 0, sizeof(*sb));
991
992 sb->md_magic = MD_SB_MAGIC;
993 sb->major_version = mddev->major_version;
994 sb->patch_version = mddev->patch_version;
995 sb->gvalid_words = 0;
996 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
997 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
998 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
999 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1000
1001 sb->ctime = mddev->ctime;
1002 sb->level = mddev->level;
1003 sb->size = mddev->dev_sectors / 2;
1004 sb->raid_disks = mddev->raid_disks;
1005 sb->md_minor = mddev->md_minor;
1006 sb->not_persistent = 0;
1007 sb->utime = mddev->utime;
1008 sb->state = 0;
1009 sb->events_hi = (mddev->events>>32);
1010 sb->events_lo = (u32)mddev->events;
1011
1012 if (mddev->reshape_position == MaxSector)
1013 sb->minor_version = 90;
1014 else {
1015 sb->minor_version = 91;
1016 sb->reshape_position = mddev->reshape_position;
1017 sb->new_level = mddev->new_level;
1018 sb->delta_disks = mddev->delta_disks;
1019 sb->new_layout = mddev->new_layout;
1020 sb->new_chunk = mddev->new_chunk_sectors << 9;
1021 }
1022 mddev->minor_version = sb->minor_version;
1023 if (mddev->in_sync)
1024 {
1025 sb->recovery_cp = mddev->recovery_cp;
1026 sb->cp_events_hi = (mddev->events>>32);
1027 sb->cp_events_lo = (u32)mddev->events;
1028 if (mddev->recovery_cp == MaxSector)
1029 sb->state = (1<< MD_SB_CLEAN);
1030 } else
1031 sb->recovery_cp = 0;
1032
1033 sb->layout = mddev->layout;
1034 sb->chunk_size = mddev->chunk_sectors << 9;
1035
1036 if (mddev->bitmap && mddev->bitmap_file == NULL)
1037 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1038
1039 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1040 list_for_each_entry(rdev2, &mddev->disks, same_set) {
1041 mdp_disk_t *d;
1042 int desc_nr;
1043 int is_active = test_bit(In_sync, &rdev2->flags);
1044
1045 if (rdev2->raid_disk >= 0 &&
1046 sb->minor_version >= 91)
1047
1048
1049
1050
1051 is_active = 1;
1052 if (rdev2->raid_disk < 0 ||
1053 test_bit(Faulty, &rdev2->flags))
1054 is_active = 0;
1055 if (is_active)
1056 desc_nr = rdev2->raid_disk;
1057 else
1058 desc_nr = next_spare++;
1059 rdev2->desc_nr = desc_nr;
1060 d = &sb->disks[rdev2->desc_nr];
1061 nr_disks++;
1062 d->number = rdev2->desc_nr;
1063 d->major = MAJOR(rdev2->bdev->bd_dev);
1064 d->minor = MINOR(rdev2->bdev->bd_dev);
1065 if (is_active)
1066 d->raid_disk = rdev2->raid_disk;
1067 else
1068 d->raid_disk = rdev2->desc_nr;
1069 if (test_bit(Faulty, &rdev2->flags))
1070 d->state = (1<<MD_DISK_FAULTY);
1071 else if (is_active) {
1072 d->state = (1<<MD_DISK_ACTIVE);
1073 if (test_bit(In_sync, &rdev2->flags))
1074 d->state |= (1<<MD_DISK_SYNC);
1075 active++;
1076 working++;
1077 } else {
1078 d->state = 0;
1079 spare++;
1080 working++;
1081 }
1082 if (test_bit(WriteMostly, &rdev2->flags))
1083 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1084 }
1085
1086 for (i=0 ; i < mddev->raid_disks ; i++) {
1087 mdp_disk_t *d = &sb->disks[i];
1088 if (d->state == 0 && d->number == 0) {
1089 d->number = i;
1090 d->raid_disk = i;
1091 d->state = (1<<MD_DISK_REMOVED);
1092 d->state |= (1<<MD_DISK_FAULTY);
1093 failed++;
1094 }
1095 }
1096 sb->nr_disks = nr_disks;
1097 sb->active_disks = active;
1098 sb->working_disks = working;
1099 sb->failed_disks = failed;
1100 sb->spare_disks = spare;
1101
1102 sb->this_disk = sb->disks[rdev->desc_nr];
1103 sb->sb_csum = calc_sb_csum(sb);
1104}
1105
1106
1107
1108
1109static unsigned long long
1110super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1111{
1112 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1113 return 0;
1114 if (rdev->mddev->bitmap_offset)
1115 return 0;
1116 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
1117 if (!num_sectors || num_sectors > rdev->sb_start)
1118 num_sectors = rdev->sb_start;
1119 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1120 rdev->sb_page);
1121 md_super_wait(rdev->mddev);
1122 return num_sectors / 2;
1123}
1124
1125
1126
1127
1128
1129
1130static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1131{
1132 __le32 disk_csum;
1133 u32 csum;
1134 unsigned long long newcsum;
1135 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1136 __le32 *isuper = (__le32*)sb;
1137 int i;
1138
1139 disk_csum = sb->sb_csum;
1140 sb->sb_csum = 0;
1141 newcsum = 0;
1142 for (i=0; size>=4; size -= 4 )
1143 newcsum += le32_to_cpu(*isuper++);
1144
1145 if (size == 2)
1146 newcsum += le16_to_cpu(*(__le16*) isuper);
1147
1148 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1149 sb->sb_csum = disk_csum;
1150 return cpu_to_le32(csum);
1151}
1152
1153static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
1154{
1155 struct mdp_superblock_1 *sb;
1156 int ret;
1157 sector_t sb_start;
1158 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1159 int bmask;
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169 switch(minor_version) {
1170 case 0:
1171 sb_start = rdev->bdev->bd_inode->i_size >> 9;
1172 sb_start -= 8*2;
1173 sb_start &= ~(sector_t)(4*2-1);
1174 break;
1175 case 1:
1176 sb_start = 0;
1177 break;
1178 case 2:
1179 sb_start = 8;
1180 break;
1181 default:
1182 return -EINVAL;
1183 }
1184 rdev->sb_start = sb_start;
1185
1186
1187
1188
1189 ret = read_disk_sb(rdev, 4096);
1190 if (ret) return ret;
1191
1192
1193 sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1194
1195 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1196 sb->major_version != cpu_to_le32(1) ||
1197 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1198 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1199 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1200 return -EINVAL;
1201
1202 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1203 printk("md: invalid superblock checksum on %s\n",
1204 bdevname(rdev->bdev,b));
1205 return -EINVAL;
1206 }
1207 if (le64_to_cpu(sb->data_size) < 10) {
1208 printk("md: data_size too small on %s\n",
1209 bdevname(rdev->bdev,b));
1210 return -EINVAL;
1211 }
1212
1213 rdev->preferred_minor = 0xffff;
1214 rdev->data_offset = le64_to_cpu(sb->data_offset);
1215 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1216
1217 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1218 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1219 if (rdev->sb_size & bmask)
1220 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1221
1222 if (minor_version
1223 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1224 return -EINVAL;
1225
1226 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1227 rdev->desc_nr = -1;
1228 else
1229 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1230
1231 if (!refdev) {
1232 ret = 1;
1233 } else {
1234 __u64 ev1, ev2;
1235 struct mdp_superblock_1 *refsb =
1236 (struct mdp_superblock_1*)page_address(refdev->sb_page);
1237
1238 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1239 sb->level != refsb->level ||
1240 sb->layout != refsb->layout ||
1241 sb->chunksize != refsb->chunksize) {
1242 printk(KERN_WARNING "md: %s has strangely different"
1243 " superblock to %s\n",
1244 bdevname(rdev->bdev,b),
1245 bdevname(refdev->bdev,b2));
1246 return -EINVAL;
1247 }
1248 ev1 = le64_to_cpu(sb->events);
1249 ev2 = le64_to_cpu(refsb->events);
1250
1251 if (ev1 > ev2)
1252 ret = 1;
1253 else
1254 ret = 0;
1255 }
1256 if (minor_version)
1257 rdev->sectors = (rdev->bdev->bd_inode->i_size >> 9) -
1258 le64_to_cpu(sb->data_offset);
1259 else
1260 rdev->sectors = rdev->sb_start;
1261 if (rdev->sectors < le64_to_cpu(sb->data_size))
1262 return -EINVAL;
1263 rdev->sectors = le64_to_cpu(sb->data_size);
1264 if (le64_to_cpu(sb->size) > rdev->sectors)
1265 return -EINVAL;
1266 return ret;
1267}
1268
1269static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1270{
1271 struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1272 __u64 ev1 = le64_to_cpu(sb->events);
1273
1274 rdev->raid_disk = -1;
1275 clear_bit(Faulty, &rdev->flags);
1276 clear_bit(In_sync, &rdev->flags);
1277 clear_bit(WriteMostly, &rdev->flags);
1278 clear_bit(BarriersNotsupp, &rdev->flags);
1279
1280 if (mddev->raid_disks == 0) {
1281 mddev->major_version = 1;
1282 mddev->patch_version = 0;
1283 mddev->external = 0;
1284 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1285 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1286 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
1287 mddev->level = le32_to_cpu(sb->level);
1288 mddev->clevel[0] = 0;
1289 mddev->layout = le32_to_cpu(sb->layout);
1290 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1291 mddev->dev_sectors = le64_to_cpu(sb->size);
1292 mddev->events = ev1;
1293 mddev->bitmap_offset = 0;
1294 mddev->default_bitmap_offset = 1024 >> 9;
1295
1296 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1297 memcpy(mddev->uuid, sb->set_uuid, 16);
1298
1299 mddev->max_disks = (4096-256)/2;
1300
1301 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1302 mddev->bitmap_file == NULL )
1303 mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);
1304
1305 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1306 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1307 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1308 mddev->new_level = le32_to_cpu(sb->new_level);
1309 mddev->new_layout = le32_to_cpu(sb->new_layout);
1310 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1311 } else {
1312 mddev->reshape_position = MaxSector;
1313 mddev->delta_disks = 0;
1314 mddev->new_level = mddev->level;
1315 mddev->new_layout = mddev->layout;
1316 mddev->new_chunk_sectors = mddev->chunk_sectors;
1317 }
1318
1319 } else if (mddev->pers == NULL) {
1320
1321 ++ev1;
1322 if (ev1 < mddev->events)
1323 return -EINVAL;
1324 } else if (mddev->bitmap) {
1325
1326
1327
1328 if (ev1 < mddev->bitmap->events_cleared)
1329 return 0;
1330 } else {
1331 if (ev1 < mddev->events)
1332
1333 return 0;
1334 }
1335 if (mddev->level != LEVEL_MULTIPATH) {
1336 int role;
1337 if (rdev->desc_nr < 0 ||
1338 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1339 role = 0xffff;
1340 rdev->desc_nr = -1;
1341 } else
1342 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1343 switch(role) {
1344 case 0xffff:
1345 break;
1346 case 0xfffe:
1347 set_bit(Faulty, &rdev->flags);
1348 break;
1349 default:
1350 if ((le32_to_cpu(sb->feature_map) &
1351 MD_FEATURE_RECOVERY_OFFSET))
1352 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1353 else
1354 set_bit(In_sync, &rdev->flags);
1355 rdev->raid_disk = role;
1356 break;
1357 }
1358 if (sb->devflags & WriteMostly1)
1359 set_bit(WriteMostly, &rdev->flags);
1360 } else
1361 set_bit(In_sync, &rdev->flags);
1362
1363 return 0;
1364}
1365
1366static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1367{
1368 struct mdp_superblock_1 *sb;
1369 mdk_rdev_t *rdev2;
1370 int max_dev, i;
1371
1372
1373 sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1374
1375 sb->feature_map = 0;
1376 sb->pad0 = 0;
1377 sb->recovery_offset = cpu_to_le64(0);
1378 memset(sb->pad1, 0, sizeof(sb->pad1));
1379 memset(sb->pad2, 0, sizeof(sb->pad2));
1380 memset(sb->pad3, 0, sizeof(sb->pad3));
1381
1382 sb->utime = cpu_to_le64((__u64)mddev->utime);
1383 sb->events = cpu_to_le64(mddev->events);
1384 if (mddev->in_sync)
1385 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1386 else
1387 sb->resync_offset = cpu_to_le64(0);
1388
1389 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1390
1391 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1392 sb->size = cpu_to_le64(mddev->dev_sectors);
1393 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1394 sb->level = cpu_to_le32(mddev->level);
1395 sb->layout = cpu_to_le32(mddev->layout);
1396
1397 if (mddev->bitmap && mddev->bitmap_file == NULL) {
1398 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
1399 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1400 }
1401
1402 if (rdev->raid_disk >= 0 &&
1403 !test_bit(In_sync, &rdev->flags)) {
1404 if (rdev->recovery_offset > 0) {
1405 sb->feature_map |=
1406 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1407 sb->recovery_offset =
1408 cpu_to_le64(rdev->recovery_offset);
1409 }
1410 }
1411
1412 if (mddev->reshape_position != MaxSector) {
1413 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1414 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1415 sb->new_layout = cpu_to_le32(mddev->new_layout);
1416 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1417 sb->new_level = cpu_to_le32(mddev->new_level);
1418 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1419 }
1420
1421 max_dev = 0;
1422 list_for_each_entry(rdev2, &mddev->disks, same_set)
1423 if (rdev2->desc_nr+1 > max_dev)
1424 max_dev = rdev2->desc_nr+1;
1425
1426 if (max_dev > le32_to_cpu(sb->max_dev)) {
1427 int bmask;
1428 sb->max_dev = cpu_to_le32(max_dev);
1429 rdev->sb_size = max_dev * 2 + 256;
1430 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1431 if (rdev->sb_size & bmask)
1432 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1433 }
1434 for (i=0; i<max_dev;i++)
1435 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1436
1437 list_for_each_entry(rdev2, &mddev->disks, same_set) {
1438 i = rdev2->desc_nr;
1439 if (test_bit(Faulty, &rdev2->flags))
1440 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1441 else if (test_bit(In_sync, &rdev2->flags))
1442 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1443 else if (rdev2->raid_disk >= 0 && rdev2->recovery_offset > 0)
1444 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1445 else
1446 sb->dev_roles[i] = cpu_to_le16(0xffff);
1447 }
1448
1449 sb->sb_csum = calc_sb_1_csum(sb);
1450}
1451
1452static unsigned long long
1453super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1454{
1455 struct mdp_superblock_1 *sb;
1456 sector_t max_sectors;
1457 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1458 return 0;
1459 if (rdev->sb_start < rdev->data_offset) {
1460
1461 max_sectors = rdev->bdev->bd_inode->i_size >> 9;
1462 max_sectors -= rdev->data_offset;
1463 if (!num_sectors || num_sectors > max_sectors)
1464 num_sectors = max_sectors;
1465 } else if (rdev->mddev->bitmap_offset) {
1466
1467 return 0;
1468 } else {
1469
1470 sector_t sb_start;
1471 sb_start = (rdev->bdev->bd_inode->i_size >> 9) - 8*2;
1472 sb_start &= ~(sector_t)(4*2 - 1);
1473 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1474 if (!num_sectors || num_sectors > max_sectors)
1475 num_sectors = max_sectors;
1476 rdev->sb_start = sb_start;
1477 }
1478 sb = (struct mdp_superblock_1 *) page_address(rdev->sb_page);
1479 sb->data_size = cpu_to_le64(num_sectors);
1480 sb->super_offset = rdev->sb_start;
1481 sb->sb_csum = calc_sb_1_csum(sb);
1482 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1483 rdev->sb_page);
1484 md_super_wait(rdev->mddev);
1485 return num_sectors / 2;
1486}
1487
1488static struct super_type super_types[] = {
1489 [0] = {
1490 .name = "0.90.0",
1491 .owner = THIS_MODULE,
1492 .load_super = super_90_load,
1493 .validate_super = super_90_validate,
1494 .sync_super = super_90_sync,
1495 .rdev_size_change = super_90_rdev_size_change,
1496 },
1497 [1] = {
1498 .name = "md-1",
1499 .owner = THIS_MODULE,
1500 .load_super = super_1_load,
1501 .validate_super = super_1_validate,
1502 .sync_super = super_1_sync,
1503 .rdev_size_change = super_1_rdev_size_change,
1504 },
1505};
1506
1507static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
1508{
1509 mdk_rdev_t *rdev, *rdev2;
1510
1511 rcu_read_lock();
1512 rdev_for_each_rcu(rdev, mddev1)
1513 rdev_for_each_rcu(rdev2, mddev2)
1514 if (rdev->bdev->bd_contains ==
1515 rdev2->bdev->bd_contains) {
1516 rcu_read_unlock();
1517 return 1;
1518 }
1519 rcu_read_unlock();
1520 return 0;
1521}
1522
1523static LIST_HEAD(pending_raid_disks);
1524
1525
1526
1527
1528
1529
1530
1531
1532int md_integrity_register(mddev_t *mddev)
1533{
1534 mdk_rdev_t *rdev, *reference = NULL;
1535
1536 if (list_empty(&mddev->disks))
1537 return 0;
1538 if (blk_get_integrity(mddev->gendisk))
1539 return 0;
1540 list_for_each_entry(rdev, &mddev->disks, same_set) {
1541
1542 if (test_bit(Faulty, &rdev->flags))
1543 continue;
1544 if (rdev->raid_disk < 0)
1545 continue;
1546
1547
1548
1549
1550 if (!bdev_get_integrity(rdev->bdev))
1551 return -EINVAL;
1552 if (!reference) {
1553
1554 reference = rdev;
1555 continue;
1556 }
1557
1558 if (blk_integrity_compare(reference->bdev->bd_disk,
1559 rdev->bdev->bd_disk) < 0)
1560 return -EINVAL;
1561 }
1562
1563
1564
1565
1566 if (blk_integrity_register(mddev->gendisk,
1567 bdev_get_integrity(reference->bdev)) != 0) {
1568 printk(KERN_ERR "md: failed to register integrity for %s\n",
1569 mdname(mddev));
1570 return -EINVAL;
1571 }
1572 printk(KERN_NOTICE "md: data integrity on %s enabled\n",
1573 mdname(mddev));
1574 return 0;
1575}
1576EXPORT_SYMBOL(md_integrity_register);
1577
1578
1579void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
1580{
1581 struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
1582 struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk);
1583
1584 if (!bi_mddev)
1585 return;
1586 if (rdev->raid_disk < 0)
1587 return;
1588 if (bi_rdev && blk_integrity_compare(mddev->gendisk,
1589 rdev->bdev->bd_disk) >= 0)
1590 return;
1591 printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
1592 blk_integrity_unregister(mddev->gendisk);
1593}
1594EXPORT_SYMBOL(md_integrity_add_rdev);
1595
1596static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1597{
1598 char b[BDEVNAME_SIZE];
1599 struct kobject *ko;
1600 char *s;
1601 int err;
1602
1603 if (rdev->mddev) {
1604 MD_BUG();
1605 return -EINVAL;
1606 }
1607
1608
1609 if (find_rdev(mddev, rdev->bdev->bd_dev))
1610 return -EEXIST;
1611
1612
1613 if (rdev->sectors && (mddev->dev_sectors == 0 ||
1614 rdev->sectors < mddev->dev_sectors)) {
1615 if (mddev->pers) {
1616
1617
1618
1619
1620 if (mddev->level > 0)
1621 return -ENOSPC;
1622 } else
1623 mddev->dev_sectors = rdev->sectors;
1624 }
1625
1626
1627
1628
1629
1630 if (rdev->desc_nr < 0) {
1631 int choice = 0;
1632 if (mddev->pers) choice = mddev->raid_disks;
1633 while (find_rdev_nr(mddev, choice))
1634 choice++;
1635 rdev->desc_nr = choice;
1636 } else {
1637 if (find_rdev_nr(mddev, rdev->desc_nr))
1638 return -EBUSY;
1639 }
1640 if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
1641 printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
1642 mdname(mddev), mddev->max_disks);
1643 return -EBUSY;
1644 }
1645 bdevname(rdev->bdev,b);
1646 while ( (s=strchr(b, '/')) != NULL)
1647 *s = '!';
1648
1649 rdev->mddev = mddev;
1650 printk(KERN_INFO "md: bind<%s>\n", b);
1651
1652 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
1653 goto fail;
1654
1655 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
1656 if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) {
1657 kobject_del(&rdev->kobj);
1658 goto fail;
1659 }
1660 rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, "state");
1661
1662 list_add_rcu(&rdev->same_set, &mddev->disks);
1663 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
1664
1665
1666 mddev->recovery_disabled = 0;
1667
1668 return 0;
1669
1670 fail:
1671 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
1672 b, mdname(mddev));
1673 return err;
1674}
1675
1676static void md_delayed_delete(struct work_struct *ws)
1677{
1678 mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work);
1679 kobject_del(&rdev->kobj);
1680 kobject_put(&rdev->kobj);
1681}
1682
1683static void unbind_rdev_from_array(mdk_rdev_t * rdev)
1684{
1685 char b[BDEVNAME_SIZE];
1686 if (!rdev->mddev) {
1687 MD_BUG();
1688 return;
1689 }
1690 bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
1691 list_del_rcu(&rdev->same_set);
1692 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
1693 rdev->mddev = NULL;
1694 sysfs_remove_link(&rdev->kobj, "block");
1695 sysfs_put(rdev->sysfs_state);
1696 rdev->sysfs_state = NULL;
1697
1698
1699
1700
1701 synchronize_rcu();
1702 INIT_WORK(&rdev->del_work, md_delayed_delete);
1703 kobject_get(&rdev->kobj);
1704 schedule_work(&rdev->del_work);
1705}
1706
1707
1708
1709
1710
1711
1712static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
1713{
1714 int err = 0;
1715 struct block_device *bdev;
1716 char b[BDEVNAME_SIZE];
1717
1718 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
1719 if (IS_ERR(bdev)) {
1720 printk(KERN_ERR "md: could not open %s.\n",
1721 __bdevname(dev, b));
1722 return PTR_ERR(bdev);
1723 }
1724 err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
1725 if (err) {
1726 printk(KERN_ERR "md: could not bd_claim %s.\n",
1727 bdevname(bdev, b));
1728 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1729 return err;
1730 }
1731 if (!shared)
1732 set_bit(AllReserved, &rdev->flags);
1733 rdev->bdev = bdev;
1734 return err;
1735}
1736
1737static void unlock_rdev(mdk_rdev_t *rdev)
1738{
1739 struct block_device *bdev = rdev->bdev;
1740 rdev->bdev = NULL;
1741 if (!bdev)
1742 MD_BUG();
1743 bd_release(bdev);
1744 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1745}
1746
1747void md_autodetect_dev(dev_t dev);
1748
1749static void export_rdev(mdk_rdev_t * rdev)
1750{
1751 char b[BDEVNAME_SIZE];
1752 printk(KERN_INFO "md: export_rdev(%s)\n",
1753 bdevname(rdev->bdev,b));
1754 if (rdev->mddev)
1755 MD_BUG();
1756 free_disk_sb(rdev);
1757#ifndef MODULE
1758 if (test_bit(AutoDetected, &rdev->flags))
1759 md_autodetect_dev(rdev->bdev->bd_dev);
1760#endif
1761 unlock_rdev(rdev);
1762 kobject_put(&rdev->kobj);
1763}
1764
1765static void kick_rdev_from_array(mdk_rdev_t * rdev)
1766{
1767 unbind_rdev_from_array(rdev);
1768 export_rdev(rdev);
1769}
1770
1771static void export_array(mddev_t *mddev)
1772{
1773 mdk_rdev_t *rdev, *tmp;
1774
1775 rdev_for_each(rdev, tmp, mddev) {
1776 if (!rdev->mddev) {
1777 MD_BUG();
1778 continue;
1779 }
1780 kick_rdev_from_array(rdev);
1781 }
1782 if (!list_empty(&mddev->disks))
1783 MD_BUG();
1784 mddev->raid_disks = 0;
1785 mddev->major_version = 0;
1786}
1787
1788static void print_desc(mdp_disk_t *desc)
1789{
1790 printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
1791 desc->major,desc->minor,desc->raid_disk,desc->state);
1792}
1793
1794static void print_sb_90(mdp_super_t *sb)
1795{
1796 int i;
1797
1798 printk(KERN_INFO
1799 "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
1800 sb->major_version, sb->minor_version, sb->patch_version,
1801 sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
1802 sb->ctime);
1803 printk(KERN_INFO "md: L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
1804 sb->level, sb->size, sb->nr_disks, sb->raid_disks,
1805 sb->md_minor, sb->layout, sb->chunk_size);
1806 printk(KERN_INFO "md: UT:%08x ST:%d AD:%d WD:%d"
1807 " FD:%d SD:%d CSUM:%08x E:%08lx\n",
1808 sb->utime, sb->state, sb->active_disks, sb->working_disks,
1809 sb->failed_disks, sb->spare_disks,
1810 sb->sb_csum, (unsigned long)sb->events_lo);
1811
1812 printk(KERN_INFO);
1813 for (i = 0; i < MD_SB_DISKS; i++) {
1814 mdp_disk_t *desc;
1815
1816 desc = sb->disks + i;
1817 if (desc->number || desc->major || desc->minor ||
1818 desc->raid_disk || (desc->state && (desc->state != 4))) {
1819 printk(" D %2d: ", i);
1820 print_desc(desc);
1821 }
1822 }
1823 printk(KERN_INFO "md: THIS: ");
1824 print_desc(&sb->this_disk);
1825}
1826
1827static void print_sb_1(struct mdp_superblock_1 *sb)
1828{
1829 __u8 *uuid;
1830
1831 uuid = sb->set_uuid;
1832 printk(KERN_INFO
1833 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x"
1834 ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n"
1835 "md: Name: \"%s\" CT:%llu\n",
1836 le32_to_cpu(sb->major_version),
1837 le32_to_cpu(sb->feature_map),
1838 uuid[0], uuid[1], uuid[2], uuid[3],
1839 uuid[4], uuid[5], uuid[6], uuid[7],
1840 uuid[8], uuid[9], uuid[10], uuid[11],
1841 uuid[12], uuid[13], uuid[14], uuid[15],
1842 sb->set_name,
1843 (unsigned long long)le64_to_cpu(sb->ctime)
1844 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
1845
1846 uuid = sb->device_uuid;
1847 printk(KERN_INFO
1848 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
1849 " RO:%llu\n"
1850 "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x"
1851 ":%02x%02x%02x%02x%02x%02x\n"
1852 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
1853 "md: (MaxDev:%u) \n",
1854 le32_to_cpu(sb->level),
1855 (unsigned long long)le64_to_cpu(sb->size),
1856 le32_to_cpu(sb->raid_disks),
1857 le32_to_cpu(sb->layout),
1858 le32_to_cpu(sb->chunksize),
1859 (unsigned long long)le64_to_cpu(sb->data_offset),
1860 (unsigned long long)le64_to_cpu(sb->data_size),
1861 (unsigned long long)le64_to_cpu(sb->super_offset),
1862 (unsigned long long)le64_to_cpu(sb->recovery_offset),
1863 le32_to_cpu(sb->dev_number),
1864 uuid[0], uuid[1], uuid[2], uuid[3],
1865 uuid[4], uuid[5], uuid[6], uuid[7],
1866 uuid[8], uuid[9], uuid[10], uuid[11],
1867 uuid[12], uuid[13], uuid[14], uuid[15],
1868 sb->devflags,
1869 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
1870 (unsigned long long)le64_to_cpu(sb->events),
1871 (unsigned long long)le64_to_cpu(sb->resync_offset),
1872 le32_to_cpu(sb->sb_csum),
1873 le32_to_cpu(sb->max_dev)
1874 );
1875}
1876
1877static void print_rdev(mdk_rdev_t *rdev, int major_version)
1878{
1879 char b[BDEVNAME_SIZE];
1880 printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
1881 bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
1882 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
1883 rdev->desc_nr);
1884 if (rdev->sb_loaded) {
1885 printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
1886 switch (major_version) {
1887 case 0:
1888 print_sb_90((mdp_super_t*)page_address(rdev->sb_page));
1889 break;
1890 case 1:
1891 print_sb_1((struct mdp_superblock_1 *)page_address(rdev->sb_page));
1892 break;
1893 }
1894 } else
1895 printk(KERN_INFO "md: no rdev superblock!\n");
1896}
1897
1898static void md_print_devices(void)
1899{
1900 struct list_head *tmp;
1901 mdk_rdev_t *rdev;
1902 mddev_t *mddev;
1903 char b[BDEVNAME_SIZE];
1904
1905 printk("\n");
1906 printk("md: **********************************\n");
1907 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
1908 printk("md: **********************************\n");
1909 for_each_mddev(mddev, tmp) {
1910
1911 if (mddev->bitmap)
1912 bitmap_print_sb(mddev->bitmap);
1913 else
1914 printk("%s: ", mdname(mddev));
1915 list_for_each_entry(rdev, &mddev->disks, same_set)
1916 printk("<%s>", bdevname(rdev->bdev,b));
1917 printk("\n");
1918
1919 list_for_each_entry(rdev, &mddev->disks, same_set)
1920 print_rdev(rdev, mddev->major_version);
1921 }
1922 printk("md: **********************************\n");
1923 printk("\n");
1924}
1925
1926
1927static void sync_sbs(mddev_t * mddev, int nospares)
1928{
1929
1930
1931
1932
1933
1934
1935 mdk_rdev_t *rdev;
1936
1937
1938 list_for_each_entry(rdev, &mddev->disks, same_set) {
1939 if (rdev->raid_disk >= 0 &&
1940 !test_bit(In_sync, &rdev->flags) &&
1941 mddev->curr_resync_completed > rdev->recovery_offset)
1942 rdev->recovery_offset = mddev->curr_resync_completed;
1943
1944 }
1945 list_for_each_entry(rdev, &mddev->disks, same_set) {
1946 if (rdev->sb_events == mddev->events ||
1947 (nospares &&
1948 rdev->raid_disk < 0 &&
1949 (rdev->sb_events&1)==0 &&
1950 rdev->sb_events+1 == mddev->events)) {
1951
1952 rdev->sb_loaded = 2;
1953 } else {
1954 super_types[mddev->major_version].
1955 sync_super(mddev, rdev);
1956 rdev->sb_loaded = 1;
1957 }
1958 }
1959}
1960
1961static void md_update_sb(mddev_t * mddev, int force_change)
1962{
1963 mdk_rdev_t *rdev;
1964 int sync_req;
1965 int nospares = 0;
1966
1967 mddev->utime = get_seconds();
1968 if (mddev->external)
1969 return;
1970repeat:
1971 spin_lock_irq(&mddev->write_lock);
1972
1973 set_bit(MD_CHANGE_PENDING, &mddev->flags);
1974 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
1975 force_change = 1;
1976 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
1977
1978
1979
1980
1981 nospares = 1;
1982 if (force_change)
1983 nospares = 0;
1984 if (mddev->degraded)
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994 nospares = 0;
1995
1996 sync_req = mddev->in_sync;
1997
1998
1999
2000 if (nospares
2001 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2002 && (mddev->events & 1)
2003 && mddev->events != 1)
2004 mddev->events--;
2005 else {
2006
2007 mddev->events ++;
2008 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) {
2009
2010
2011 if ((mddev->events&1)==0)
2012 nospares = 0;
2013 } else {
2014
2015 if ((mddev->events&1))
2016 nospares = 0;
2017 }
2018 }
2019
2020 if (!mddev->events) {
2021
2022
2023
2024
2025
2026 MD_BUG();
2027 mddev->events --;
2028 }
2029
2030
2031
2032
2033
2034 if (!mddev->persistent) {
2035 if (!mddev->external)
2036 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2037
2038 spin_unlock_irq(&mddev->write_lock);
2039 wake_up(&mddev->sb_wait);
2040 return;
2041 }
2042 sync_sbs(mddev, nospares);
2043 spin_unlock_irq(&mddev->write_lock);
2044
2045 dprintk(KERN_INFO
2046 "md: updating %s RAID superblock on device (in sync %d)\n",
2047 mdname(mddev),mddev->in_sync);
2048
2049 bitmap_update_sb(mddev->bitmap);
2050 list_for_each_entry(rdev, &mddev->disks, same_set) {
2051 char b[BDEVNAME_SIZE];
2052 dprintk(KERN_INFO "md: ");
2053 if (rdev->sb_loaded != 1)
2054 continue;
2055 if (test_bit(Faulty, &rdev->flags))
2056 dprintk("(skipping faulty ");
2057
2058 dprintk("%s ", bdevname(rdev->bdev,b));
2059 if (!test_bit(Faulty, &rdev->flags)) {
2060 md_super_write(mddev,rdev,
2061 rdev->sb_start, rdev->sb_size,
2062 rdev->sb_page);
2063 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
2064 bdevname(rdev->bdev,b),
2065 (unsigned long long)rdev->sb_start);
2066 rdev->sb_events = mddev->events;
2067
2068 } else
2069 dprintk(")\n");
2070 if (mddev->level == LEVEL_MULTIPATH)
2071
2072 break;
2073 }
2074 md_super_wait(mddev);
2075
2076
2077 spin_lock_irq(&mddev->write_lock);
2078 if (mddev->in_sync != sync_req ||
2079 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
2080
2081 spin_unlock_irq(&mddev->write_lock);
2082 goto repeat;
2083 }
2084 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2085 spin_unlock_irq(&mddev->write_lock);
2086 wake_up(&mddev->sb_wait);
2087 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2088 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2089
2090}
2091
2092
2093
2094
2095static int cmd_match(const char *cmd, const char *str)
2096{
2097
2098
2099
2100
2101 while (*cmd && *str && *cmd == *str) {
2102 cmd++;
2103 str++;
2104 }
2105 if (*cmd == '\n')
2106 cmd++;
2107 if (*str || *cmd)
2108 return 0;
2109 return 1;
2110}
2111
2112struct rdev_sysfs_entry {
2113 struct attribute attr;
2114 ssize_t (*show)(mdk_rdev_t *, char *);
2115 ssize_t (*store)(mdk_rdev_t *, const char *, size_t);
2116};
2117
2118static ssize_t
2119state_show(mdk_rdev_t *rdev, char *page)
2120{
2121 char *sep = "";
2122 size_t len = 0;
2123
2124 if (test_bit(Faulty, &rdev->flags)) {
2125 len+= sprintf(page+len, "%sfaulty",sep);
2126 sep = ",";
2127 }
2128 if (test_bit(In_sync, &rdev->flags)) {
2129 len += sprintf(page+len, "%sin_sync",sep);
2130 sep = ",";
2131 }
2132 if (test_bit(WriteMostly, &rdev->flags)) {
2133 len += sprintf(page+len, "%swrite_mostly",sep);
2134 sep = ",";
2135 }
2136 if (test_bit(Blocked, &rdev->flags)) {
2137 len += sprintf(page+len, "%sblocked", sep);
2138 sep = ",";
2139 }
2140 if (!test_bit(Faulty, &rdev->flags) &&
2141 !test_bit(In_sync, &rdev->flags)) {
2142 len += sprintf(page+len, "%sspare", sep);
2143 sep = ",";
2144 }
2145 return len+sprintf(page+len, "\n");
2146}
2147
2148static ssize_t
2149state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2150{
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160 int err = -EINVAL;
2161 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2162 md_error(rdev->mddev, rdev);
2163 err = 0;
2164 } else if (cmd_match(buf, "remove")) {
2165 if (rdev->raid_disk >= 0)
2166 err = -EBUSY;
2167 else {
2168 mddev_t *mddev = rdev->mddev;
2169 kick_rdev_from_array(rdev);
2170 if (mddev->pers)
2171 md_update_sb(mddev, 1);
2172 md_new_event(mddev);
2173 err = 0;
2174 }
2175 } else if (cmd_match(buf, "writemostly")) {
2176 set_bit(WriteMostly, &rdev->flags);
2177 err = 0;
2178 } else if (cmd_match(buf, "-writemostly")) {
2179 clear_bit(WriteMostly, &rdev->flags);
2180 err = 0;
2181 } else if (cmd_match(buf, "blocked")) {
2182 set_bit(Blocked, &rdev->flags);
2183 err = 0;
2184 } else if (cmd_match(buf, "-blocked")) {
2185 clear_bit(Blocked, &rdev->flags);
2186 wake_up(&rdev->blocked_wait);
2187 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2188 md_wakeup_thread(rdev->mddev->thread);
2189
2190 err = 0;
2191 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2192 set_bit(In_sync, &rdev->flags);
2193 err = 0;
2194 }
2195 if (!err && rdev->sysfs_state)
2196 sysfs_notify_dirent(rdev->sysfs_state);
2197 return err ? err : len;
2198}
2199static struct rdev_sysfs_entry rdev_state =
2200__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
2201
2202static ssize_t
2203errors_show(mdk_rdev_t *rdev, char *page)
2204{
2205 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2206}
2207
2208static ssize_t
2209errors_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2210{
2211 char *e;
2212 unsigned long n = simple_strtoul(buf, &e, 10);
2213 if (*buf && (*e == 0 || *e == '\n')) {
2214 atomic_set(&rdev->corrected_errors, n);
2215 return len;
2216 }
2217 return -EINVAL;
2218}
2219static struct rdev_sysfs_entry rdev_errors =
2220__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2221
2222static ssize_t
2223slot_show(mdk_rdev_t *rdev, char *page)
2224{
2225 if (rdev->raid_disk < 0)
2226 return sprintf(page, "none\n");
2227 else
2228 return sprintf(page, "%d\n", rdev->raid_disk);
2229}
2230
2231static ssize_t
2232slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2233{
2234 char *e;
2235 int err;
2236 char nm[20];
2237 int slot = simple_strtoul(buf, &e, 10);
2238 if (strncmp(buf, "none", 4)==0)
2239 slot = -1;
2240 else if (e==buf || (*e && *e!= '\n'))
2241 return -EINVAL;
2242 if (rdev->mddev->pers && slot == -1) {
2243
2244
2245
2246
2247
2248
2249
2250 if (rdev->raid_disk == -1)
2251 return -EEXIST;
2252
2253 if (rdev->mddev->pers->hot_add_disk == NULL)
2254 return -EINVAL;
2255 err = rdev->mddev->pers->
2256 hot_remove_disk(rdev->mddev, rdev->raid_disk);
2257 if (err)
2258 return err;
2259 sprintf(nm, "rd%d", rdev->raid_disk);
2260 sysfs_remove_link(&rdev->mddev->kobj, nm);
2261 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2262 md_wakeup_thread(rdev->mddev->thread);
2263 } else if (rdev->mddev->pers) {
2264 mdk_rdev_t *rdev2;
2265
2266
2267
2268
2269 if (rdev->raid_disk != -1)
2270 return -EBUSY;
2271
2272 if (rdev->mddev->pers->hot_add_disk == NULL)
2273 return -EINVAL;
2274
2275 list_for_each_entry(rdev2, &rdev->mddev->disks, same_set)
2276 if (rdev2->raid_disk == slot)
2277 return -EEXIST;
2278
2279 rdev->raid_disk = slot;
2280 if (test_bit(In_sync, &rdev->flags))
2281 rdev->saved_raid_disk = slot;
2282 else
2283 rdev->saved_raid_disk = -1;
2284 err = rdev->mddev->pers->
2285 hot_add_disk(rdev->mddev, rdev);
2286 if (err) {
2287 rdev->raid_disk = -1;
2288 return err;
2289 } else
2290 sysfs_notify_dirent(rdev->sysfs_state);
2291 sprintf(nm, "rd%d", rdev->raid_disk);
2292 if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
2293 printk(KERN_WARNING
2294 "md: cannot register "
2295 "%s for %s\n",
2296 nm, mdname(rdev->mddev));
2297
2298
2299 } else {
2300 if (slot >= rdev->mddev->raid_disks)
2301 return -ENOSPC;
2302 rdev->raid_disk = slot;
2303
2304 clear_bit(Faulty, &rdev->flags);
2305 clear_bit(WriteMostly, &rdev->flags);
2306 set_bit(In_sync, &rdev->flags);
2307 sysfs_notify_dirent(rdev->sysfs_state);
2308 }
2309 return len;
2310}
2311
2312
2313static struct rdev_sysfs_entry rdev_slot =
2314__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2315
2316static ssize_t
2317offset_show(mdk_rdev_t *rdev, char *page)
2318{
2319 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2320}
2321
2322static ssize_t
2323offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2324{
2325 char *e;
2326 unsigned long long offset = simple_strtoull(buf, &e, 10);
2327 if (e==buf || (*e && *e != '\n'))
2328 return -EINVAL;
2329 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2330 return -EBUSY;
2331 if (rdev->sectors && rdev->mddev->external)
2332
2333
2334 return -EBUSY;
2335 rdev->data_offset = offset;
2336 return len;
2337}
2338
2339static struct rdev_sysfs_entry rdev_offset =
2340__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2341
2342static ssize_t
2343rdev_size_show(mdk_rdev_t *rdev, char *page)
2344{
2345 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2346}
2347
2348static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2349{
2350
2351 if (s1+l1 <= s2)
2352 return 0;
2353 if (s2+l2 <= s1)
2354 return 0;
2355 return 1;
2356}
2357
2358static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2359{
2360 unsigned long long blocks;
2361 sector_t new;
2362
2363 if (strict_strtoull(buf, 10, &blocks) < 0)
2364 return -EINVAL;
2365
2366 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2367 return -EINVAL;
2368
2369 new = blocks * 2;
2370 if (new != blocks * 2)
2371 return -EINVAL;
2372
2373 *sectors = new;
2374 return 0;
2375}
2376
2377static ssize_t
2378rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2379{
2380 mddev_t *my_mddev = rdev->mddev;
2381 sector_t oldsectors = rdev->sectors;
2382 sector_t sectors;
2383
2384 if (strict_blocks_to_sectors(buf, §ors) < 0)
2385 return -EINVAL;
2386 if (my_mddev->pers && rdev->raid_disk >= 0) {
2387 if (my_mddev->persistent) {
2388 sectors = super_types[my_mddev->major_version].
2389 rdev_size_change(rdev, sectors);
2390 if (!sectors)
2391 return -EBUSY;
2392 } else if (!sectors)
2393 sectors = (rdev->bdev->bd_inode->i_size >> 9) -
2394 rdev->data_offset;
2395 }
2396 if (sectors < my_mddev->dev_sectors)
2397 return -EINVAL;
2398
2399 rdev->sectors = sectors;
2400 if (sectors > oldsectors && my_mddev->external) {
2401
2402
2403
2404
2405
2406 mddev_t *mddev;
2407 int overlap = 0;
2408 struct list_head *tmp;
2409
2410 mddev_unlock(my_mddev);
2411 for_each_mddev(mddev, tmp) {
2412 mdk_rdev_t *rdev2;
2413
2414 mddev_lock(mddev);
2415 list_for_each_entry(rdev2, &mddev->disks, same_set)
2416 if (test_bit(AllReserved, &rdev2->flags) ||
2417 (rdev->bdev == rdev2->bdev &&
2418 rdev != rdev2 &&
2419 overlaps(rdev->data_offset, rdev->sectors,
2420 rdev2->data_offset,
2421 rdev2->sectors))) {
2422 overlap = 1;
2423 break;
2424 }
2425 mddev_unlock(mddev);
2426 if (overlap) {
2427 mddev_put(mddev);
2428 break;
2429 }
2430 }
2431 mddev_lock(my_mddev);
2432 if (overlap) {
2433
2434
2435
2436
2437
2438
2439 rdev->sectors = oldsectors;
2440 return -EBUSY;
2441 }
2442 }
2443 return len;
2444}
2445
2446static struct rdev_sysfs_entry rdev_size =
2447__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
2448
2449static struct attribute *rdev_default_attrs[] = {
2450 &rdev_state.attr,
2451 &rdev_errors.attr,
2452 &rdev_slot.attr,
2453 &rdev_offset.attr,
2454 &rdev_size.attr,
2455 NULL,
2456};
2457static ssize_t
2458rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
2459{
2460 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
2461 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
2462 mddev_t *mddev = rdev->mddev;
2463 ssize_t rv;
2464
2465 if (!entry->show)
2466 return -EIO;
2467
2468 rv = mddev ? mddev_lock(mddev) : -EBUSY;
2469 if (!rv) {
2470 if (rdev->mddev == NULL)
2471 rv = -EBUSY;
2472 else
2473 rv = entry->show(rdev, page);
2474 mddev_unlock(mddev);
2475 }
2476 return rv;
2477}
2478
2479static ssize_t
2480rdev_attr_store(struct kobject *kobj, struct attribute *attr,
2481 const char *page, size_t length)
2482{
2483 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
2484 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
2485 ssize_t rv;
2486 mddev_t *mddev = rdev->mddev;
2487
2488 if (!entry->store)
2489 return -EIO;
2490 if (!capable(CAP_SYS_ADMIN))
2491 return -EACCES;
2492 rv = mddev ? mddev_lock(mddev): -EBUSY;
2493 if (!rv) {
2494 if (rdev->mddev == NULL)
2495 rv = -EBUSY;
2496 else
2497 rv = entry->store(rdev, page, length);
2498 mddev_unlock(mddev);
2499 }
2500 return rv;
2501}
2502
2503static void rdev_free(struct kobject *ko)
2504{
2505 mdk_rdev_t *rdev = container_of(ko, mdk_rdev_t, kobj);
2506 kfree(rdev);
2507}
2508static struct sysfs_ops rdev_sysfs_ops = {
2509 .show = rdev_attr_show,
2510 .store = rdev_attr_store,
2511};
2512static struct kobj_type rdev_ktype = {
2513 .release = rdev_free,
2514 .sysfs_ops = &rdev_sysfs_ops,
2515 .default_attrs = rdev_default_attrs,
2516};
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_minor)
2529{
2530 char b[BDEVNAME_SIZE];
2531 int err;
2532 mdk_rdev_t *rdev;
2533 sector_t size;
2534
2535 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
2536 if (!rdev) {
2537 printk(KERN_ERR "md: could not alloc mem for new device!\n");
2538 return ERR_PTR(-ENOMEM);
2539 }
2540
2541 if ((err = alloc_disk_sb(rdev)))
2542 goto abort_free;
2543
2544 err = lock_rdev(rdev, newdev, super_format == -2);
2545 if (err)
2546 goto abort_free;
2547
2548 kobject_init(&rdev->kobj, &rdev_ktype);
2549
2550 rdev->desc_nr = -1;
2551 rdev->saved_raid_disk = -1;
2552 rdev->raid_disk = -1;
2553 rdev->flags = 0;
2554 rdev->data_offset = 0;
2555 rdev->sb_events = 0;
2556 atomic_set(&rdev->nr_pending, 0);
2557 atomic_set(&rdev->read_errors, 0);
2558 atomic_set(&rdev->corrected_errors, 0);
2559
2560 size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
2561 if (!size) {
2562 printk(KERN_WARNING
2563 "md: %s has zero or unknown size, marking faulty!\n",
2564 bdevname(rdev->bdev,b));
2565 err = -EINVAL;
2566 goto abort_free;
2567 }
2568
2569 if (super_format >= 0) {
2570 err = super_types[super_format].
2571 load_super(rdev, NULL, super_minor);
2572 if (err == -EINVAL) {
2573 printk(KERN_WARNING
2574 "md: %s does not have a valid v%d.%d "
2575 "superblock, not importing!\n",
2576 bdevname(rdev->bdev,b),
2577 super_format, super_minor);
2578 goto abort_free;
2579 }
2580 if (err < 0) {
2581 printk(KERN_WARNING
2582 "md: could not read %s's sb, not importing!\n",
2583 bdevname(rdev->bdev,b));
2584 goto abort_free;
2585 }
2586 }
2587
2588 INIT_LIST_HEAD(&rdev->same_set);
2589 init_waitqueue_head(&rdev->blocked_wait);
2590
2591 return rdev;
2592
2593abort_free:
2594 if (rdev->sb_page) {
2595 if (rdev->bdev)
2596 unlock_rdev(rdev);
2597 free_disk_sb(rdev);
2598 }
2599 kfree(rdev);
2600 return ERR_PTR(err);
2601}
2602
2603
2604
2605
2606
2607
2608static void analyze_sbs(mddev_t * mddev)
2609{
2610 int i;
2611 mdk_rdev_t *rdev, *freshest, *tmp;
2612 char b[BDEVNAME_SIZE];
2613
2614 freshest = NULL;
2615 rdev_for_each(rdev, tmp, mddev)
2616 switch (super_types[mddev->major_version].
2617 load_super(rdev, freshest, mddev->minor_version)) {
2618 case 1:
2619 freshest = rdev;
2620 break;
2621 case 0:
2622 break;
2623 default:
2624 printk( KERN_ERR \
2625 "md: fatal superblock inconsistency in %s"
2626 " -- removing from array\n",
2627 bdevname(rdev->bdev,b));
2628 kick_rdev_from_array(rdev);
2629 }
2630
2631
2632 super_types[mddev->major_version].
2633 validate_super(mddev, freshest);
2634
2635 i = 0;
2636 rdev_for_each(rdev, tmp, mddev) {
2637 if (rdev->desc_nr >= mddev->max_disks ||
2638 i > mddev->max_disks) {
2639 printk(KERN_WARNING
2640 "md: %s: %s: only %d devices permitted\n",
2641 mdname(mddev), bdevname(rdev->bdev, b),
2642 mddev->max_disks);
2643 kick_rdev_from_array(rdev);
2644 continue;
2645 }
2646 if (rdev != freshest)
2647 if (super_types[mddev->major_version].
2648 validate_super(mddev, rdev)) {
2649 printk(KERN_WARNING "md: kicking non-fresh %s"
2650 " from array!\n",
2651 bdevname(rdev->bdev,b));
2652 kick_rdev_from_array(rdev);
2653 continue;
2654 }
2655 if (mddev->level == LEVEL_MULTIPATH) {
2656 rdev->desc_nr = i++;
2657 rdev->raid_disk = rdev->desc_nr;
2658 set_bit(In_sync, &rdev->flags);
2659 } else if (rdev->raid_disk >= (mddev->raid_disks - min(0, mddev->delta_disks))) {
2660 rdev->raid_disk = -1;
2661 clear_bit(In_sync, &rdev->flags);
2662 }
2663 }
2664}
2665
2666static void md_safemode_timeout(unsigned long data);
2667
2668static ssize_t
2669safe_delay_show(mddev_t *mddev, char *page)
2670{
2671 int msec = (mddev->safemode_delay*1000)/HZ;
2672 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
2673}
2674static ssize_t
2675safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
2676{
2677 int scale=1;
2678 int dot=0;
2679 int i;
2680 unsigned long msec;
2681 char buf[30];
2682
2683
2684 if (len >= sizeof(buf))
2685 return -EINVAL;
2686 strlcpy(buf, cbuf, sizeof(buf));
2687 for (i=0; i<len; i++) {
2688 if (dot) {
2689 if (isdigit(buf[i])) {
2690 buf[i-1] = buf[i];
2691 scale *= 10;
2692 }
2693 buf[i] = 0;
2694 } else if (buf[i] == '.') {
2695 dot=1;
2696 buf[i] = 0;
2697 }
2698 }
2699 if (strict_strtoul(buf, 10, &msec) < 0)
2700 return -EINVAL;
2701 msec = (msec * 1000) / scale;
2702 if (msec == 0)
2703 mddev->safemode_delay = 0;
2704 else {
2705 unsigned long old_delay = mddev->safemode_delay;
2706 mddev->safemode_delay = (msec*HZ)/1000;
2707 if (mddev->safemode_delay == 0)
2708 mddev->safemode_delay = 1;
2709 if (mddev->safemode_delay < old_delay)
2710 md_safemode_timeout((unsigned long)mddev);
2711 }
2712 return len;
2713}
2714static struct md_sysfs_entry md_safe_delay =
2715__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
2716
2717static ssize_t
2718level_show(mddev_t *mddev, char *page)
2719{
2720 struct mdk_personality *p = mddev->pers;
2721 if (p)
2722 return sprintf(page, "%s\n", p->name);
2723 else if (mddev->clevel[0])
2724 return sprintf(page, "%s\n", mddev->clevel);
2725 else if (mddev->level != LEVEL_NONE)
2726 return sprintf(page, "%d\n", mddev->level);
2727 else
2728 return 0;
2729}
2730
2731static ssize_t
2732level_store(mddev_t *mddev, const char *buf, size_t len)
2733{
2734 char level[16];
2735 ssize_t rv = len;
2736 struct mdk_personality *pers;
2737 void *priv;
2738 mdk_rdev_t *rdev;
2739
2740 if (mddev->pers == NULL) {
2741 if (len == 0)
2742 return 0;
2743 if (len >= sizeof(mddev->clevel))
2744 return -ENOSPC;
2745 strncpy(mddev->clevel, buf, len);
2746 if (mddev->clevel[len-1] == '\n')
2747 len--;
2748 mddev->clevel[len] = 0;
2749 mddev->level = LEVEL_NONE;
2750 return rv;
2751 }
2752
2753
2754
2755
2756
2757
2758
2759 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
2760 return -EBUSY;
2761
2762 if (!mddev->pers->quiesce) {
2763 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
2764 mdname(mddev), mddev->pers->name);
2765 return -EINVAL;
2766 }
2767
2768
2769 if (len == 0 || len >= sizeof(level))
2770 return -EINVAL;
2771 strncpy(level, buf, len);
2772 if (level[len-1] == '\n')
2773 len--;
2774 level[len] = 0;
2775
2776 request_module("md-%s", level);
2777 spin_lock(&pers_lock);
2778 pers = find_pers(LEVEL_NONE, level);
2779 if (!pers || !try_module_get(pers->owner)) {
2780 spin_unlock(&pers_lock);
2781 printk(KERN_WARNING "md: personality %s not loaded\n", level);
2782 return -EINVAL;
2783 }
2784 spin_unlock(&pers_lock);
2785
2786 if (pers == mddev->pers) {
2787
2788 module_put(pers->owner);
2789 return rv;
2790 }
2791 if (!pers->takeover) {
2792 module_put(pers->owner);
2793 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
2794 mdname(mddev), level);
2795 return -EINVAL;
2796 }
2797
2798
2799
2800
2801 priv = pers->takeover(mddev);
2802 if (IS_ERR(priv)) {
2803 mddev->new_level = mddev->level;
2804 mddev->new_layout = mddev->layout;
2805 mddev->new_chunk_sectors = mddev->chunk_sectors;
2806 mddev->raid_disks -= mddev->delta_disks;
2807 mddev->delta_disks = 0;
2808 module_put(pers->owner);
2809 printk(KERN_WARNING "md: %s: %s would not accept array\n",
2810 mdname(mddev), level);
2811 return PTR_ERR(priv);
2812 }
2813
2814
2815 mddev_suspend(mddev);
2816 mddev->pers->stop(mddev);
2817 module_put(mddev->pers->owner);
2818
2819 list_for_each_entry(rdev, &mddev->disks, same_set)
2820 if (rdev->raid_disk >= mddev->raid_disks) {
2821 rdev->raid_disk = -1;
2822 clear_bit(In_sync, &rdev->flags);
2823 }
2824 mddev->pers = pers;
2825 mddev->private = priv;
2826 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
2827 mddev->level = mddev->new_level;
2828 mddev->layout = mddev->new_layout;
2829 mddev->chunk_sectors = mddev->new_chunk_sectors;
2830 mddev->delta_disks = 0;
2831 pers->run(mddev);
2832 mddev_resume(mddev);
2833 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2834 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2835 md_wakeup_thread(mddev->thread);
2836 return rv;
2837}
2838
2839static struct md_sysfs_entry md_level =
2840__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
2841
2842
2843static ssize_t
2844layout_show(mddev_t *mddev, char *page)
2845{
2846
2847 if (mddev->reshape_position != MaxSector &&
2848 mddev->layout != mddev->new_layout)
2849 return sprintf(page, "%d (%d)\n",
2850 mddev->new_layout, mddev->layout);
2851 return sprintf(page, "%d\n", mddev->layout);
2852}
2853
2854static ssize_t
2855layout_store(mddev_t *mddev, const char *buf, size_t len)
2856{
2857 char *e;
2858 unsigned long n = simple_strtoul(buf, &e, 10);
2859
2860 if (!*buf || (*e && *e != '\n'))
2861 return -EINVAL;
2862
2863 if (mddev->pers) {
2864 int err;
2865 if (mddev->pers->check_reshape == NULL)
2866 return -EBUSY;
2867 mddev->new_layout = n;
2868 err = mddev->pers->check_reshape(mddev);
2869 if (err) {
2870 mddev->new_layout = mddev->layout;
2871 return err;
2872 }
2873 } else {
2874 mddev->new_layout = n;
2875 if (mddev->reshape_position == MaxSector)
2876 mddev->layout = n;
2877 }
2878 return len;
2879}
2880static struct md_sysfs_entry md_layout =
2881__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
2882
2883
2884static ssize_t
2885raid_disks_show(mddev_t *mddev, char *page)
2886{
2887 if (mddev->raid_disks == 0)
2888 return 0;
2889 if (mddev->reshape_position != MaxSector &&
2890 mddev->delta_disks != 0)
2891 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
2892 mddev->raid_disks - mddev->delta_disks);
2893 return sprintf(page, "%d\n", mddev->raid_disks);
2894}
2895
2896static int update_raid_disks(mddev_t *mddev, int raid_disks);
2897
2898static ssize_t
2899raid_disks_store(mddev_t *mddev, const char *buf, size_t len)
2900{
2901 char *e;
2902 int rv = 0;
2903 unsigned long n = simple_strtoul(buf, &e, 10);
2904
2905 if (!*buf || (*e && *e != '\n'))
2906 return -EINVAL;
2907
2908 if (mddev->pers)
2909 rv = update_raid_disks(mddev, n);
2910 else if (mddev->reshape_position != MaxSector) {
2911 int olddisks = mddev->raid_disks - mddev->delta_disks;
2912 mddev->delta_disks = n - olddisks;
2913 mddev->raid_disks = n;
2914 } else
2915 mddev->raid_disks = n;
2916 return rv ? rv : len;
2917}
2918static struct md_sysfs_entry md_raid_disks =
2919__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
2920
2921static ssize_t
2922chunk_size_show(mddev_t *mddev, char *page)
2923{
2924 if (mddev->reshape_position != MaxSector &&
2925 mddev->chunk_sectors != mddev->new_chunk_sectors)
2926 return sprintf(page, "%d (%d)\n",
2927 mddev->new_chunk_sectors << 9,
2928 mddev->chunk_sectors << 9);
2929 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
2930}
2931
2932static ssize_t
2933chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
2934{
2935 char *e;
2936 unsigned long n = simple_strtoul(buf, &e, 10);
2937
2938 if (!*buf || (*e && *e != '\n'))
2939 return -EINVAL;
2940
2941 if (mddev->pers) {
2942 int err;
2943 if (mddev->pers->check_reshape == NULL)
2944 return -EBUSY;
2945 mddev->new_chunk_sectors = n >> 9;
2946 err = mddev->pers->check_reshape(mddev);
2947 if (err) {
2948 mddev->new_chunk_sectors = mddev->chunk_sectors;
2949 return err;
2950 }
2951 } else {
2952 mddev->new_chunk_sectors = n >> 9;
2953 if (mddev->reshape_position == MaxSector)
2954 mddev->chunk_sectors = n >> 9;
2955 }
2956 return len;
2957}
2958static struct md_sysfs_entry md_chunk_size =
2959__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
2960
2961static ssize_t
2962resync_start_show(mddev_t *mddev, char *page)
2963{
2964 if (mddev->recovery_cp == MaxSector)
2965 return sprintf(page, "none\n");
2966 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
2967}
2968
2969static ssize_t
2970resync_start_store(mddev_t *mddev, const char *buf, size_t len)
2971{
2972 char *e;
2973 unsigned long long n = simple_strtoull(buf, &e, 10);
2974
2975 if (mddev->pers)
2976 return -EBUSY;
2977 if (!*buf || (*e && *e != '\n'))
2978 return -EINVAL;
2979
2980 mddev->recovery_cp = n;
2981 return len;
2982}
2983static struct md_sysfs_entry md_resync_start =
2984__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3023 write_pending, active_idle, bad_word};
3024static char *array_states[] = {
3025 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3026 "write-pending", "active-idle", NULL };
3027
3028static int match_word(const char *word, char **list)
3029{
3030 int n;
3031 for (n=0; list[n]; n++)
3032 if (cmd_match(word, list[n]))
3033 break;
3034 return n;
3035}
3036
3037static ssize_t
3038array_state_show(mddev_t *mddev, char *page)
3039{
3040 enum array_state st = inactive;
3041
3042 if (mddev->pers)
3043 switch(mddev->ro) {
3044 case 1:
3045 st = readonly;
3046 break;
3047 case 2:
3048 st = read_auto;
3049 break;
3050 case 0:
3051 if (mddev->in_sync)
3052 st = clean;
3053 else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags))
3054 st = write_pending;
3055 else if (mddev->safemode)
3056 st = active_idle;
3057 else
3058 st = active;
3059 }
3060 else {
3061 if (list_empty(&mddev->disks) &&
3062 mddev->raid_disks == 0 &&
3063 mddev->dev_sectors == 0)
3064 st = clear;
3065 else
3066 st = inactive;
3067 }
3068 return sprintf(page, "%s\n", array_states[st]);
3069}
3070
3071static int do_md_stop(mddev_t * mddev, int ro, int is_open);
3072static int do_md_run(mddev_t * mddev);
3073static int restart_array(mddev_t *mddev);
3074
3075static ssize_t
3076array_state_store(mddev_t *mddev, const char *buf, size_t len)
3077{
3078 int err = -EINVAL;
3079 enum array_state st = match_word(buf, array_states);
3080 switch(st) {
3081 case bad_word:
3082 break;
3083 case clear:
3084
3085 if (atomic_read(&mddev->openers) > 0)
3086 return -EBUSY;
3087 err = do_md_stop(mddev, 0, 0);
3088 break;
3089 case inactive:
3090
3091 if (mddev->pers) {
3092 if (atomic_read(&mddev->openers) > 0)
3093 return -EBUSY;
3094 err = do_md_stop(mddev, 2, 0);
3095 } else
3096 err = 0;
3097 break;
3098 case suspended:
3099 break;
3100 case readonly:
3101 if (mddev->pers)
3102 err = do_md_stop(mddev, 1, 0);
3103 else {
3104 mddev->ro = 1;
3105 set_disk_ro(mddev->gendisk, 1);
3106 err = do_md_run(mddev);
3107 }
3108 break;
3109 case read_auto:
3110 if (mddev->pers) {
3111 if (mddev->ro == 0)
3112 err = do_md_stop(mddev, 1, 0);
3113 else if (mddev->ro == 1)
3114 err = restart_array(mddev);
3115 if (err == 0) {
3116 mddev->ro = 2;
3117 set_disk_ro(mddev->gendisk, 0);
3118 }
3119 } else {
3120 mddev->ro = 2;
3121 err = do_md_run(mddev);
3122 }
3123 break;
3124 case clean:
3125 if (mddev->pers) {
3126 restart_array(mddev);
3127 spin_lock_irq(&mddev->write_lock);
3128 if (atomic_read(&mddev->writes_pending) == 0) {
3129 if (mddev->in_sync == 0) {
3130 mddev->in_sync = 1;
3131 if (mddev->safemode == 1)
3132 mddev->safemode = 0;
3133 if (mddev->persistent)
3134 set_bit(MD_CHANGE_CLEAN,
3135 &mddev->flags);
3136 }
3137 err = 0;
3138 } else
3139 err = -EBUSY;
3140 spin_unlock_irq(&mddev->write_lock);
3141 } else
3142 err = -EINVAL;
3143 break;
3144 case active:
3145 if (mddev->pers) {
3146 restart_array(mddev);
3147 if (mddev->external)
3148 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
3149 wake_up(&mddev->sb_wait);
3150 err = 0;
3151 } else {
3152 mddev->ro = 0;
3153 set_disk_ro(mddev->gendisk, 0);
3154 err = do_md_run(mddev);
3155 }
3156 break;
3157 case write_pending:
3158 case active_idle:
3159
3160 break;
3161 }
3162 if (err)
3163 return err;
3164 else {
3165 sysfs_notify_dirent(mddev->sysfs_state);
3166 return len;
3167 }
3168}
3169static struct md_sysfs_entry md_array_state =
3170__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
3171
3172static ssize_t
3173null_show(mddev_t *mddev, char *page)
3174{
3175 return -EINVAL;
3176}
3177
3178static ssize_t
3179new_dev_store(mddev_t *mddev, const char *buf, size_t len)
3180{
3181
3182
3183
3184
3185
3186
3187
3188 char *e;
3189 int major = simple_strtoul(buf, &e, 10);
3190 int minor;
3191 dev_t dev;
3192 mdk_rdev_t *rdev;
3193 int err;
3194
3195 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
3196 return -EINVAL;
3197 minor = simple_strtoul(e+1, &e, 10);
3198 if (*e && *e != '\n')
3199 return -EINVAL;
3200 dev = MKDEV(major, minor);
3201 if (major != MAJOR(dev) ||
3202 minor != MINOR(dev))
3203 return -EOVERFLOW;
3204
3205
3206 if (mddev->persistent) {
3207 rdev = md_import_device(dev, mddev->major_version,
3208 mddev->minor_version);
3209 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
3210 mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
3211 mdk_rdev_t, same_set);
3212 err = super_types[mddev->major_version]
3213 .load_super(rdev, rdev0, mddev->minor_version);
3214 if (err < 0)
3215 goto out;
3216 }
3217 } else if (mddev->external)
3218 rdev = md_import_device(dev, -2, -1);
3219 else
3220 rdev = md_import_device(dev, -1, -1);
3221
3222 if (IS_ERR(rdev))
3223 return PTR_ERR(rdev);
3224 err = bind_rdev_to_array(rdev, mddev);
3225 out:
3226 if (err)
3227 export_rdev(rdev);
3228 return err ? err : len;
3229}
3230
3231static struct md_sysfs_entry md_new_device =
3232__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
3233
3234static ssize_t
3235bitmap_store(mddev_t *mddev, const char *buf, size_t len)
3236{
3237 char *end;
3238 unsigned long chunk, end_chunk;
3239
3240 if (!mddev->bitmap)
3241 goto out;
3242
3243 while (*buf) {
3244 chunk = end_chunk = simple_strtoul(buf, &end, 0);
3245 if (buf == end) break;
3246 if (*end == '-') {
3247 buf = end + 1;
3248 end_chunk = simple_strtoul(buf, &end, 0);
3249 if (buf == end) break;
3250 }
3251 if (*end && !isspace(*end)) break;
3252 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
3253 buf = end;
3254 while (isspace(*buf)) buf++;
3255 }
3256 bitmap_unplug(mddev->bitmap);
3257out:
3258 return len;
3259}
3260
3261static struct md_sysfs_entry md_bitmap =
3262__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
3263
3264static ssize_t
3265size_show(mddev_t *mddev, char *page)
3266{
3267 return sprintf(page, "%llu\n",
3268 (unsigned long long)mddev->dev_sectors / 2);
3269}
3270
3271static int update_size(mddev_t *mddev, sector_t num_sectors);
3272
3273static ssize_t
3274size_store(mddev_t *mddev, const char *buf, size_t len)
3275{
3276
3277
3278
3279
3280 sector_t sectors;
3281 int err = strict_blocks_to_sectors(buf, §ors);
3282
3283 if (err < 0)
3284 return err;
3285 if (mddev->pers) {
3286 err = update_size(mddev, sectors);
3287 md_update_sb(mddev, 1);
3288 } else {
3289 if (mddev->dev_sectors == 0 ||
3290 mddev->dev_sectors > sectors)
3291 mddev->dev_sectors = sectors;
3292 else
3293 err = -ENOSPC;
3294 }
3295 return err ? err : len;
3296}
3297
3298static struct md_sysfs_entry md_size =
3299__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
3300
3301
3302
3303
3304
3305
3306
3307
3308static ssize_t
3309metadata_show(mddev_t *mddev, char *page)
3310{
3311 if (mddev->persistent)
3312 return sprintf(page, "%d.%d\n",
3313 mddev->major_version, mddev->minor_version);
3314 else if (mddev->external)
3315 return sprintf(page, "external:%s\n", mddev->metadata_type);
3316 else
3317 return sprintf(page, "none\n");
3318}
3319
3320static ssize_t
3321metadata_store(mddev_t *mddev, const char *buf, size_t len)
3322{
3323 int major, minor;
3324 char *e;
3325
3326
3327
3328
3329 if (mddev->external && strncmp(buf, "external:", 9) == 0)
3330 ;
3331 else if (!list_empty(&mddev->disks))
3332 return -EBUSY;
3333
3334 if (cmd_match(buf, "none")) {
3335 mddev->persistent = 0;
3336 mddev->external = 0;
3337 mddev->major_version = 0;
3338 mddev->minor_version = 90;
3339 return len;
3340 }
3341 if (strncmp(buf, "external:", 9) == 0) {
3342 size_t namelen = len-9;
3343 if (namelen >= sizeof(mddev->metadata_type))
3344 namelen = sizeof(mddev->metadata_type)-1;
3345 strncpy(mddev->metadata_type, buf+9, namelen);
3346 mddev->metadata_type[namelen] = 0;
3347 if (namelen && mddev->metadata_type[namelen-1] == '\n')
3348 mddev->metadata_type[--namelen] = 0;
3349 mddev->persistent = 0;
3350 mddev->external = 1;
3351 mddev->major_version = 0;
3352 mddev->minor_version = 90;
3353 return len;
3354 }
3355 major = simple_strtoul(buf, &e, 10);
3356 if (e==buf || *e != '.')
3357 return -EINVAL;
3358 buf = e+1;
3359 minor = simple_strtoul(buf, &e, 10);
3360 if (e==buf || (*e && *e != '\n') )
3361 return -EINVAL;
3362 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
3363 return -ENOENT;
3364 mddev->major_version = major;
3365 mddev->minor_version = minor;
3366 mddev->persistent = 1;
3367 mddev->external = 0;
3368 return len;
3369}
3370
3371static struct md_sysfs_entry md_metadata =
3372__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
3373
3374static ssize_t
3375action_show(mddev_t *mddev, char *page)
3376{
3377 char *type = "idle";
3378 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3379 type = "frozen";
3380 else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3381 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
3382 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
3383 type = "reshape";
3384 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
3385 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
3386 type = "resync";
3387 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
3388 type = "check";
3389 else
3390 type = "repair";
3391 } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
3392 type = "recover";
3393 }
3394 return sprintf(page, "%s\n", type);
3395}
3396
3397static ssize_t
3398action_store(mddev_t *mddev, const char *page, size_t len)
3399{
3400 if (!mddev->pers || !mddev->pers->sync_request)
3401 return -EINVAL;
3402
3403 if (cmd_match(page, "frozen"))
3404 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3405 else
3406 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3407
3408 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
3409 if (mddev->sync_thread) {
3410 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
3411 md_unregister_thread(mddev->sync_thread);
3412 mddev->sync_thread = NULL;
3413 mddev->recovery = 0;
3414 }
3415 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3416 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
3417 return -EBUSY;
3418 else if (cmd_match(page, "resync"))
3419 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3420 else if (cmd_match(page, "recover")) {
3421 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
3422 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3423 } else if (cmd_match(page, "reshape")) {
3424 int err;
3425 if (mddev->pers->start_reshape == NULL)
3426 return -EINVAL;
3427 err = mddev->pers->start_reshape(mddev);
3428 if (err)
3429 return err;
3430 sysfs_notify(&mddev->kobj, NULL, "degraded");
3431 } else {
3432 if (cmd_match(page, "check"))
3433 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
3434 else if (!cmd_match(page, "repair"))
3435 return -EINVAL;
3436 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
3437 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
3438 }
3439 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3440 md_wakeup_thread(mddev->thread);
3441 sysfs_notify_dirent(mddev->sysfs_action);
3442 return len;
3443}
3444
3445static ssize_t
3446mismatch_cnt_show(mddev_t *mddev, char *page)
3447{
3448 return sprintf(page, "%llu\n",
3449 (unsigned long long) mddev->resync_mismatches);
3450}
3451
3452static struct md_sysfs_entry md_scan_mode =
3453__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
3454
3455
3456static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
3457
3458static ssize_t
3459sync_min_show(mddev_t *mddev, char *page)
3460{
3461 return sprintf(page, "%d (%s)\n", speed_min(mddev),
3462 mddev->sync_speed_min ? "local": "system");
3463}
3464
3465static ssize_t
3466sync_min_store(mddev_t *mddev, const char *buf, size_t len)
3467{
3468 int min;
3469 char *e;
3470 if (strncmp(buf, "system", 6)==0) {
3471 mddev->sync_speed_min = 0;
3472 return len;
3473 }
3474 min = simple_strtoul(buf, &e, 10);
3475 if (buf == e || (*e && *e != '\n') || min <= 0)
3476 return -EINVAL;
3477 mddev->sync_speed_min = min;
3478 return len;
3479}
3480
3481static struct md_sysfs_entry md_sync_min =
3482__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
3483
3484static ssize_t
3485sync_max_show(mddev_t *mddev, char *page)
3486{
3487 return sprintf(page, "%d (%s)\n", speed_max(mddev),
3488 mddev->sync_speed_max ? "local": "system");
3489}
3490
3491static ssize_t
3492sync_max_store(mddev_t *mddev, const char *buf, size_t len)
3493{
3494 int max;
3495 char *e;
3496 if (strncmp(buf, "system", 6)==0) {
3497 mddev->sync_speed_max = 0;
3498 return len;
3499 }
3500 max = simple_strtoul(buf, &e, 10);
3501 if (buf == e || (*e && *e != '\n') || max <= 0)
3502 return -EINVAL;
3503 mddev->sync_speed_max = max;
3504 return len;
3505}
3506
3507static struct md_sysfs_entry md_sync_max =
3508__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
3509
3510static ssize_t
3511degraded_show(mddev_t *mddev, char *page)
3512{
3513 return sprintf(page, "%d\n", mddev->degraded);
3514}
3515static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
3516
3517static ssize_t
3518sync_force_parallel_show(mddev_t *mddev, char *page)
3519{
3520 return sprintf(page, "%d\n", mddev->parallel_resync);
3521}
3522
3523static ssize_t
3524sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len)
3525{
3526 long n;
3527
3528 if (strict_strtol(buf, 10, &n))
3529 return -EINVAL;
3530
3531 if (n != 0 && n != 1)
3532 return -EINVAL;
3533
3534 mddev->parallel_resync = n;
3535
3536 if (mddev->sync_thread)
3537 wake_up(&resync_wait);
3538
3539 return len;
3540}
3541
3542
3543static struct md_sysfs_entry md_sync_force_parallel =
3544__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
3545 sync_force_parallel_show, sync_force_parallel_store);
3546
3547static ssize_t
3548sync_speed_show(mddev_t *mddev, char *page)
3549{
3550 unsigned long resync, dt, db;
3551 if (mddev->curr_resync == 0)
3552 return sprintf(page, "none\n");
3553 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
3554 dt = (jiffies - mddev->resync_mark) / HZ;
3555 if (!dt) dt++;
3556 db = resync - mddev->resync_mark_cnt;
3557 return sprintf(page, "%lu\n", db/dt/2);
3558}
3559
3560static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
3561
3562static ssize_t
3563sync_completed_show(mddev_t *mddev, char *page)
3564{
3565 unsigned long max_sectors, resync;
3566
3567 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3568 return sprintf(page, "none\n");
3569
3570 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
3571 max_sectors = mddev->resync_max_sectors;
3572 else
3573 max_sectors = mddev->dev_sectors;
3574
3575 resync = mddev->curr_resync_completed;
3576 return sprintf(page, "%lu / %lu\n", resync, max_sectors);
3577}
3578
3579static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
3580
3581static ssize_t
3582min_sync_show(mddev_t *mddev, char *page)
3583{
3584 return sprintf(page, "%llu\n",
3585 (unsigned long long)mddev->resync_min);
3586}
3587static ssize_t
3588min_sync_store(mddev_t *mddev, const char *buf, size_t len)
3589{
3590 unsigned long long min;
3591 if (strict_strtoull(buf, 10, &min))
3592 return -EINVAL;
3593 if (min > mddev->resync_max)
3594 return -EINVAL;
3595 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3596 return -EBUSY;
3597
3598
3599 if (mddev->chunk_sectors) {
3600 sector_t temp = min;
3601 if (sector_div(temp, mddev->chunk_sectors))
3602 return -EINVAL;
3603 }
3604 mddev->resync_min = min;
3605
3606 return len;
3607}
3608
3609static struct md_sysfs_entry md_min_sync =
3610__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
3611
3612static ssize_t
3613max_sync_show(mddev_t *mddev, char *page)
3614{
3615 if (mddev->resync_max == MaxSector)
3616 return sprintf(page, "max\n");
3617 else
3618 return sprintf(page, "%llu\n",
3619 (unsigned long long)mddev->resync_max);
3620}
3621static ssize_t
3622max_sync_store(mddev_t *mddev, const char *buf, size_t len)
3623{
3624 if (strncmp(buf, "max", 3) == 0)
3625 mddev->resync_max = MaxSector;
3626 else {
3627 unsigned long long max;
3628 if (strict_strtoull(buf, 10, &max))
3629 return -EINVAL;
3630 if (max < mddev->resync_min)
3631 return -EINVAL;
3632 if (max < mddev->resync_max &&
3633 mddev->ro == 0 &&
3634 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3635 return -EBUSY;
3636
3637
3638 if (mddev->chunk_sectors) {
3639 sector_t temp = max;
3640 if (sector_div(temp, mddev->chunk_sectors))
3641 return -EINVAL;
3642 }
3643 mddev->resync_max = max;
3644 }
3645 wake_up(&mddev->recovery_wait);
3646 return len;
3647}
3648
3649static struct md_sysfs_entry md_max_sync =
3650__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
3651
3652static ssize_t
3653suspend_lo_show(mddev_t *mddev, char *page)
3654{
3655 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
3656}
3657
3658static ssize_t
3659suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
3660{
3661 char *e;
3662 unsigned long long new = simple_strtoull(buf, &e, 10);
3663
3664 if (mddev->pers == NULL ||
3665 mddev->pers->quiesce == NULL)
3666 return -EINVAL;
3667 if (buf == e || (*e && *e != '\n'))
3668 return -EINVAL;
3669 if (new >= mddev->suspend_hi ||
3670 (new > mddev->suspend_lo && new < mddev->suspend_hi)) {
3671 mddev->suspend_lo = new;
3672 mddev->pers->quiesce(mddev, 2);
3673 return len;
3674 } else
3675 return -EINVAL;
3676}
3677static struct md_sysfs_entry md_suspend_lo =
3678__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
3679
3680
3681static ssize_t
3682suspend_hi_show(mddev_t *mddev, char *page)
3683{
3684 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
3685}
3686
3687static ssize_t
3688suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
3689{
3690 char *e;
3691 unsigned long long new = simple_strtoull(buf, &e, 10);
3692
3693 if (mddev->pers == NULL ||
3694 mddev->pers->quiesce == NULL)
3695 return -EINVAL;
3696 if (buf == e || (*e && *e != '\n'))
3697 return -EINVAL;
3698 if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) ||
3699 (new > mddev->suspend_lo && new > mddev->suspend_hi)) {
3700 mddev->suspend_hi = new;
3701 mddev->pers->quiesce(mddev, 1);
3702 mddev->pers->quiesce(mddev, 0);
3703 return len;
3704 } else
3705 return -EINVAL;
3706}
3707static struct md_sysfs_entry md_suspend_hi =
3708__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
3709
3710static ssize_t
3711reshape_position_show(mddev_t *mddev, char *page)
3712{
3713 if (mddev->reshape_position != MaxSector)
3714 return sprintf(page, "%llu\n",
3715 (unsigned long long)mddev->reshape_position);
3716 strcpy(page, "none\n");
3717 return 5;
3718}
3719
3720static ssize_t
3721reshape_position_store(mddev_t *mddev, const char *buf, size_t len)
3722{
3723 char *e;
3724 unsigned long long new = simple_strtoull(buf, &e, 10);
3725 if (mddev->pers)
3726 return -EBUSY;
3727 if (buf == e || (*e && *e != '\n'))
3728 return -EINVAL;
3729 mddev->reshape_position = new;
3730 mddev->delta_disks = 0;
3731 mddev->new_level = mddev->level;
3732 mddev->new_layout = mddev->layout;
3733 mddev->new_chunk_sectors = mddev->chunk_sectors;
3734 return len;
3735}
3736
3737static struct md_sysfs_entry md_reshape_position =
3738__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
3739 reshape_position_store);
3740
3741static ssize_t
3742array_size_show(mddev_t *mddev, char *page)
3743{
3744 if (mddev->external_size)
3745 return sprintf(page, "%llu\n",
3746 (unsigned long long)mddev->array_sectors/2);
3747 else
3748 return sprintf(page, "default\n");
3749}
3750
3751static ssize_t
3752array_size_store(mddev_t *mddev, const char *buf, size_t len)
3753{
3754 sector_t sectors;
3755
3756 if (strncmp(buf, "default", 7) == 0) {
3757 if (mddev->pers)
3758 sectors = mddev->pers->size(mddev, 0, 0);
3759 else
3760 sectors = mddev->array_sectors;
3761
3762 mddev->external_size = 0;
3763 } else {
3764 if (strict_blocks_to_sectors(buf, §ors) < 0)
3765 return -EINVAL;
3766 if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
3767 return -E2BIG;
3768
3769 mddev->external_size = 1;
3770 }
3771
3772 mddev->array_sectors = sectors;
3773 set_capacity(mddev->gendisk, mddev->array_sectors);
3774 if (mddev->pers)
3775 revalidate_disk(mddev->gendisk);
3776
3777 return len;
3778}
3779
3780static struct md_sysfs_entry md_array_size =
3781__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
3782 array_size_store);
3783
3784static struct attribute *md_default_attrs[] = {
3785 &md_level.attr,
3786 &md_layout.attr,
3787 &md_raid_disks.attr,
3788 &md_chunk_size.attr,
3789 &md_size.attr,
3790 &md_resync_start.attr,
3791 &md_metadata.attr,
3792 &md_new_device.attr,
3793 &md_safe_delay.attr,
3794 &md_array_state.attr,
3795 &md_reshape_position.attr,
3796 &md_array_size.attr,
3797 NULL,
3798};
3799
3800static struct attribute *md_redundancy_attrs[] = {
3801 &md_scan_mode.attr,
3802 &md_mismatches.attr,
3803 &md_sync_min.attr,
3804 &md_sync_max.attr,
3805 &md_sync_speed.attr,
3806 &md_sync_force_parallel.attr,
3807 &md_sync_completed.attr,
3808 &md_min_sync.attr,
3809 &md_max_sync.attr,
3810 &md_suspend_lo.attr,
3811 &md_suspend_hi.attr,
3812 &md_bitmap.attr,
3813 &md_degraded.attr,
3814 NULL,
3815};
3816static struct attribute_group md_redundancy_group = {
3817 .name = NULL,
3818 .attrs = md_redundancy_attrs,
3819};
3820
3821
3822static ssize_t
3823md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3824{
3825 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
3826 mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
3827 ssize_t rv;
3828
3829 if (!entry->show)
3830 return -EIO;
3831 rv = mddev_lock(mddev);
3832 if (!rv) {
3833 rv = entry->show(mddev, page);
3834 mddev_unlock(mddev);
3835 }
3836 return rv;
3837}
3838
3839static ssize_t
3840md_attr_store(struct kobject *kobj, struct attribute *attr,
3841 const char *page, size_t length)
3842{
3843 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
3844 mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
3845 ssize_t rv;
3846
3847 if (!entry->store)
3848 return -EIO;
3849 if (!capable(CAP_SYS_ADMIN))
3850 return -EACCES;
3851 rv = mddev_lock(mddev);
3852 if (mddev->hold_active == UNTIL_IOCTL)
3853 mddev->hold_active = 0;
3854 if (!rv) {
3855 rv = entry->store(mddev, page, length);
3856 mddev_unlock(mddev);
3857 }
3858 return rv;
3859}
3860
3861static void md_free(struct kobject *ko)
3862{
3863 mddev_t *mddev = container_of(ko, mddev_t, kobj);
3864
3865 if (mddev->sysfs_state)
3866 sysfs_put(mddev->sysfs_state);
3867
3868 if (mddev->gendisk) {
3869 del_gendisk(mddev->gendisk);
3870 put_disk(mddev->gendisk);
3871 }
3872 if (mddev->queue)
3873 blk_cleanup_queue(mddev->queue);
3874
3875 kfree(mddev);
3876}
3877
3878static struct sysfs_ops md_sysfs_ops = {
3879 .show = md_attr_show,
3880 .store = md_attr_store,
3881};
3882static struct kobj_type md_ktype = {
3883 .release = md_free,
3884 .sysfs_ops = &md_sysfs_ops,
3885 .default_attrs = md_default_attrs,
3886};
3887
3888int mdp_major = 0;
3889
3890static void mddev_delayed_delete(struct work_struct *ws)
3891{
3892 mddev_t *mddev = container_of(ws, mddev_t, del_work);
3893
3894 if (mddev->private == &md_redundancy_group) {
3895 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
3896 if (mddev->sysfs_action)
3897 sysfs_put(mddev->sysfs_action);
3898 mddev->sysfs_action = NULL;
3899 mddev->private = NULL;
3900 }
3901 kobject_del(&mddev->kobj);
3902 kobject_put(&mddev->kobj);
3903}
3904
3905static int md_alloc(dev_t dev, char *name)
3906{
3907 static DEFINE_MUTEX(disks_mutex);
3908 mddev_t *mddev = mddev_find(dev);
3909 struct gendisk *disk;
3910 int partitioned;
3911 int shift;
3912 int unit;
3913 int error;
3914
3915 if (!mddev)
3916 return -ENODEV;
3917
3918 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
3919 shift = partitioned ? MdpMinorShift : 0;
3920 unit = MINOR(mddev->unit) >> shift;
3921
3922
3923
3924
3925 flush_scheduled_work();
3926
3927 mutex_lock(&disks_mutex);
3928 error = -EEXIST;
3929 if (mddev->gendisk)
3930 goto abort;
3931
3932 if (name) {
3933
3934
3935 mddev_t *mddev2;
3936 spin_lock(&all_mddevs_lock);
3937
3938 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
3939 if (mddev2->gendisk &&
3940 strcmp(mddev2->gendisk->disk_name, name) == 0) {
3941 spin_unlock(&all_mddevs_lock);
3942 goto abort;
3943 }
3944 spin_unlock(&all_mddevs_lock);
3945 }
3946
3947 error = -ENOMEM;
3948 mddev->queue = blk_alloc_queue(GFP_KERNEL);
3949 if (!mddev->queue)
3950 goto abort;
3951 mddev->queue->queuedata = mddev;
3952
3953
3954 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
3955
3956 blk_queue_make_request(mddev->queue, md_make_request);
3957
3958 disk = alloc_disk(1 << shift);
3959 if (!disk) {
3960 blk_cleanup_queue(mddev->queue);
3961 mddev->queue = NULL;
3962 goto abort;
3963 }
3964 disk->major = MAJOR(mddev->unit);
3965 disk->first_minor = unit << shift;
3966 if (name)
3967 strcpy(disk->disk_name, name);
3968 else if (partitioned)
3969 sprintf(disk->disk_name, "md_d%d", unit);
3970 else
3971 sprintf(disk->disk_name, "md%d", unit);
3972 disk->fops = &md_fops;
3973 disk->private_data = mddev;
3974 disk->queue = mddev->queue;
3975
3976
3977
3978
3979 disk->flags |= GENHD_FL_EXT_DEVT;
3980 add_disk(disk);
3981 mddev->gendisk = disk;
3982 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
3983 &disk_to_dev(disk)->kobj, "%s", "md");
3984 if (error) {
3985
3986
3987
3988 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
3989 disk->disk_name);
3990 error = 0;
3991 }
3992 abort:
3993 mutex_unlock(&disks_mutex);
3994 if (!error) {
3995 kobject_uevent(&mddev->kobj, KOBJ_ADD);
3996 mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
3997 }
3998 mddev_put(mddev);
3999 return error;
4000}
4001
4002static struct kobject *md_probe(dev_t dev, int *part, void *data)
4003{
4004 md_alloc(dev, NULL);
4005 return NULL;
4006}
4007
4008static int add_named_array(const char *val, struct kernel_param *kp)
4009{
4010
4011
4012
4013
4014 int len = strlen(val);
4015 char buf[DISK_NAME_LEN];
4016
4017 while (len && val[len-1] == '\n')
4018 len--;
4019 if (len >= DISK_NAME_LEN)
4020 return -E2BIG;
4021 strlcpy(buf, val, len+1);
4022 if (strncmp(buf, "md_", 3) != 0)
4023 return -EINVAL;
4024 return md_alloc(0, buf);
4025}
4026
4027static void md_safemode_timeout(unsigned long data)
4028{
4029 mddev_t *mddev = (mddev_t *) data;
4030
4031 if (!atomic_read(&mddev->writes_pending)) {
4032 mddev->safemode = 1;
4033 if (mddev->external)
4034 sysfs_notify_dirent(mddev->sysfs_state);
4035 }
4036 md_wakeup_thread(mddev->thread);
4037}
4038
4039static int start_dirty_degraded;
4040
4041static int do_md_run(mddev_t * mddev)
4042{
4043 int err;
4044 mdk_rdev_t *rdev;
4045 struct gendisk *disk;
4046 struct mdk_personality *pers;
4047
4048 if (list_empty(&mddev->disks))
4049
4050 return -EINVAL;
4051
4052 if (mddev->pers)
4053 return -EBUSY;
4054
4055
4056
4057
4058 if (!mddev->raid_disks) {
4059 if (!mddev->persistent)
4060 return -EINVAL;
4061 analyze_sbs(mddev);
4062 }
4063
4064 if (mddev->level != LEVEL_NONE)
4065 request_module("md-level-%d", mddev->level);
4066 else if (mddev->clevel[0])
4067 request_module("md-%s", mddev->clevel);
4068
4069
4070
4071
4072
4073
4074 list_for_each_entry(rdev, &mddev->disks, same_set) {
4075 if (test_bit(Faulty, &rdev->flags))
4076 continue;
4077 sync_blockdev(rdev->bdev);
4078 invalidate_bdev(rdev->bdev);
4079
4080
4081
4082
4083
4084 if (rdev->data_offset < rdev->sb_start) {
4085 if (mddev->dev_sectors &&
4086 rdev->data_offset + mddev->dev_sectors
4087 > rdev->sb_start) {
4088 printk("md: %s: data overlaps metadata\n",
4089 mdname(mddev));
4090 return -EINVAL;
4091 }
4092 } else {
4093 if (rdev->sb_start + rdev->sb_size/512
4094 > rdev->data_offset) {
4095 printk("md: %s: metadata overlaps data\n",
4096 mdname(mddev));
4097 return -EINVAL;
4098 }
4099 }
4100 sysfs_notify_dirent(rdev->sysfs_state);
4101 }
4102
4103 md_probe(mddev->unit, NULL, NULL);
4104 disk = mddev->gendisk;
4105 if (!disk)
4106 return -ENOMEM;
4107
4108 spin_lock(&pers_lock);
4109 pers = find_pers(mddev->level, mddev->clevel);
4110 if (!pers || !try_module_get(pers->owner)) {
4111 spin_unlock(&pers_lock);
4112 if (mddev->level != LEVEL_NONE)
4113 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
4114 mddev->level);
4115 else
4116 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
4117 mddev->clevel);
4118 return -EINVAL;
4119 }
4120 mddev->pers = pers;
4121 spin_unlock(&pers_lock);
4122 if (mddev->level != pers->level) {
4123 mddev->level = pers->level;
4124 mddev->new_level = pers->level;
4125 }
4126 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4127
4128 if (mddev->reshape_position != MaxSector &&
4129 pers->start_reshape == NULL) {
4130
4131 mddev->pers = NULL;
4132 module_put(pers->owner);
4133 return -EINVAL;
4134 }
4135
4136 if (pers->sync_request) {
4137
4138
4139
4140 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
4141 mdk_rdev_t *rdev2;
4142 int warned = 0;
4143
4144 list_for_each_entry(rdev, &mddev->disks, same_set)
4145 list_for_each_entry(rdev2, &mddev->disks, same_set) {
4146 if (rdev < rdev2 &&
4147 rdev->bdev->bd_contains ==
4148 rdev2->bdev->bd_contains) {
4149 printk(KERN_WARNING
4150 "%s: WARNING: %s appears to be"
4151 " on the same physical disk as"
4152 " %s.\n",
4153 mdname(mddev),
4154 bdevname(rdev->bdev,b),
4155 bdevname(rdev2->bdev,b2));
4156 warned = 1;
4157 }
4158 }
4159
4160 if (warned)
4161 printk(KERN_WARNING
4162 "True protection against single-disk"
4163 " failure might be compromised.\n");
4164 }
4165
4166 mddev->recovery = 0;
4167
4168 mddev->resync_max_sectors = mddev->dev_sectors;
4169
4170 mddev->barriers_work = 1;
4171 mddev->ok_start_degraded = start_dirty_degraded;
4172
4173 if (start_readonly)
4174 mddev->ro = 2;
4175
4176 err = mddev->pers->run(mddev);
4177 if (err)
4178 printk(KERN_ERR "md: pers->run() failed ...\n");
4179 else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
4180 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
4181 " but 'external_size' not in effect?\n", __func__);
4182 printk(KERN_ERR
4183 "md: invalid array_size %llu > default size %llu\n",
4184 (unsigned long long)mddev->array_sectors / 2,
4185 (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
4186 err = -EINVAL;
4187 mddev->pers->stop(mddev);
4188 }
4189 if (err == 0 && mddev->pers->sync_request) {
4190 err = bitmap_create(mddev);
4191 if (err) {
4192 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
4193 mdname(mddev), err);
4194 mddev->pers->stop(mddev);
4195 }
4196 }
4197 if (err) {
4198 module_put(mddev->pers->owner);
4199 mddev->pers = NULL;
4200 bitmap_destroy(mddev);
4201 return err;
4202 }
4203 if (mddev->pers->sync_request) {
4204 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4205 printk(KERN_WARNING
4206 "md: cannot register extra attributes for %s\n",
4207 mdname(mddev));
4208 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
4209 } else if (mddev->ro == 2)
4210 mddev->ro = 0;
4211
4212 atomic_set(&mddev->writes_pending,0);
4213 mddev->safemode = 0;
4214 mddev->safemode_timer.function = md_safemode_timeout;
4215 mddev->safemode_timer.data = (unsigned long) mddev;
4216 mddev->safemode_delay = (200 * HZ)/1000 +1;
4217 mddev->in_sync = 1;
4218
4219 list_for_each_entry(rdev, &mddev->disks, same_set)
4220 if (rdev->raid_disk >= 0) {
4221 char nm[20];
4222 sprintf(nm, "rd%d", rdev->raid_disk);
4223 if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
4224 printk("md: cannot register %s for %s\n",
4225 nm, mdname(mddev));
4226 }
4227
4228 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4229
4230 if (mddev->flags)
4231 md_update_sb(mddev, 0);
4232
4233 set_capacity(disk, mddev->array_sectors);
4234
4235
4236
4237
4238
4239 if (mddev->degraded && !mddev->sync_thread) {
4240 int spares = 0;
4241 list_for_each_entry(rdev, &mddev->disks, same_set)
4242 if (rdev->raid_disk >= 0 &&
4243 !test_bit(In_sync, &rdev->flags) &&
4244 !test_bit(Faulty, &rdev->flags))
4245
4246 spares++;
4247 if (spares && mddev->pers->sync_request) {
4248 mddev->recovery = 0;
4249 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
4250 mddev->sync_thread = md_register_thread(md_do_sync,
4251 mddev,
4252 "resync");
4253 if (!mddev->sync_thread) {
4254 printk(KERN_ERR "%s: could not start resync"
4255 " thread...\n",
4256 mdname(mddev));
4257
4258 mddev->recovery = 0;
4259 }
4260 }
4261 }
4262 md_wakeup_thread(mddev->thread);
4263 md_wakeup_thread(mddev->sync_thread);
4264
4265 revalidate_disk(mddev->gendisk);
4266 mddev->changed = 1;
4267 md_new_event(mddev);
4268 sysfs_notify_dirent(mddev->sysfs_state);
4269 if (mddev->sysfs_action)
4270 sysfs_notify_dirent(mddev->sysfs_action);
4271 sysfs_notify(&mddev->kobj, NULL, "degraded");
4272 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
4273 return 0;
4274}
4275
4276static int restart_array(mddev_t *mddev)
4277{
4278 struct gendisk *disk = mddev->gendisk;
4279
4280
4281 if (list_empty(&mddev->disks))
4282 return -ENXIO;
4283 if (!mddev->pers)
4284 return -EINVAL;
4285 if (!mddev->ro)
4286 return -EBUSY;
4287 mddev->safemode = 0;
4288 mddev->ro = 0;
4289 set_disk_ro(disk, 0);
4290 printk(KERN_INFO "md: %s switched to read-write mode.\n",
4291 mdname(mddev));
4292
4293 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4294 md_wakeup_thread(mddev->thread);
4295 md_wakeup_thread(mddev->sync_thread);
4296 sysfs_notify_dirent(mddev->sysfs_state);
4297 return 0;
4298}
4299
4300
4301
4302static int deny_bitmap_write_access(struct file * file)
4303{
4304 struct inode *inode = file->f_mapping->host;
4305
4306 spin_lock(&inode->i_lock);
4307 if (atomic_read(&inode->i_writecount) > 1) {
4308 spin_unlock(&inode->i_lock);
4309 return -ETXTBSY;
4310 }
4311 atomic_set(&inode->i_writecount, -1);
4312 spin_unlock(&inode->i_lock);
4313
4314 return 0;
4315}
4316
4317static void restore_bitmap_write_access(struct file *file)
4318{
4319 struct inode *inode = file->f_mapping->host;
4320
4321 spin_lock(&inode->i_lock);
4322 atomic_set(&inode->i_writecount, 1);
4323 spin_unlock(&inode->i_lock);
4324}
4325
4326
4327
4328
4329
4330
4331static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4332{
4333 int err = 0;
4334 struct gendisk *disk = mddev->gendisk;
4335 mdk_rdev_t *rdev;
4336
4337 mutex_lock(&mddev->open_mutex);
4338 if (atomic_read(&mddev->openers) > is_open) {
4339 printk("md: %s still in use.\n",mdname(mddev));
4340 err = -EBUSY;
4341 } else if (mddev->pers) {
4342
4343 if (mddev->sync_thread) {
4344 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4345 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4346 md_unregister_thread(mddev->sync_thread);
4347 mddev->sync_thread = NULL;
4348 }
4349
4350 del_timer_sync(&mddev->safemode_timer);
4351
4352 switch(mode) {
4353 case 1:
4354 err = -ENXIO;
4355 if (mddev->ro==1)
4356 goto out;
4357 mddev->ro = 1;
4358 break;
4359 case 0:
4360 case 2:
4361 bitmap_flush(mddev);
4362 md_super_wait(mddev);
4363 if (mddev->ro)
4364 set_disk_ro(disk, 0);
4365
4366 mddev->pers->stop(mddev);
4367 mddev->queue->merge_bvec_fn = NULL;
4368 mddev->queue->unplug_fn = NULL;
4369 mddev->queue->backing_dev_info.congested_fn = NULL;
4370 module_put(mddev->pers->owner);
4371 if (mddev->pers->sync_request)
4372 mddev->private = &md_redundancy_group;
4373 mddev->pers = NULL;
4374
4375 sysfs_notify_dirent(mddev->sysfs_state);
4376
4377 list_for_each_entry(rdev, &mddev->disks, same_set)
4378 if (rdev->raid_disk >= 0) {
4379 char nm[20];
4380 sprintf(nm, "rd%d", rdev->raid_disk);
4381 sysfs_remove_link(&mddev->kobj, nm);
4382 }
4383
4384 set_capacity(disk, 0);
4385 mddev->changed = 1;
4386
4387 if (mddev->ro)
4388 mddev->ro = 0;
4389 }
4390 if (!mddev->in_sync || mddev->flags) {
4391
4392 mddev->in_sync = 1;
4393 md_update_sb(mddev, 1);
4394 }
4395 if (mode == 1)
4396 set_disk_ro(disk, 1);
4397 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4398 err = 0;
4399 }
4400out:
4401 mutex_unlock(&mddev->open_mutex);
4402 if (err)
4403 return err;
4404
4405
4406
4407 if (mode == 0) {
4408
4409 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
4410
4411 bitmap_destroy(mddev);
4412 if (mddev->bitmap_file) {
4413 restore_bitmap_write_access(mddev->bitmap_file);
4414 fput(mddev->bitmap_file);
4415 mddev->bitmap_file = NULL;
4416 }
4417 mddev->bitmap_offset = 0;
4418
4419
4420 flush_scheduled_work();
4421
4422 export_array(mddev);
4423
4424 mddev->array_sectors = 0;
4425 mddev->external_size = 0;
4426 mddev->dev_sectors = 0;
4427 mddev->raid_disks = 0;
4428 mddev->recovery_cp = 0;
4429 mddev->resync_min = 0;
4430 mddev->resync_max = MaxSector;
4431 mddev->reshape_position = MaxSector;
4432 mddev->external = 0;
4433 mddev->persistent = 0;
4434 mddev->level = LEVEL_NONE;
4435 mddev->clevel[0] = 0;
4436 mddev->flags = 0;
4437 mddev->ro = 0;
4438 mddev->metadata_type[0] = 0;
4439 mddev->chunk_sectors = 0;
4440 mddev->ctime = mddev->utime = 0;
4441 mddev->layout = 0;
4442 mddev->max_disks = 0;
4443 mddev->events = 0;
4444 mddev->delta_disks = 0;
4445 mddev->new_level = LEVEL_NONE;
4446 mddev->new_layout = 0;
4447 mddev->new_chunk_sectors = 0;
4448 mddev->curr_resync = 0;
4449 mddev->resync_mismatches = 0;
4450 mddev->suspend_lo = mddev->suspend_hi = 0;
4451 mddev->sync_speed_min = mddev->sync_speed_max = 0;
4452 mddev->recovery = 0;
4453 mddev->in_sync = 0;
4454 mddev->changed = 0;
4455 mddev->degraded = 0;
4456 mddev->barriers_work = 0;
4457 mddev->safemode = 0;
4458 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
4459 if (mddev->hold_active == UNTIL_STOP)
4460 mddev->hold_active = 0;
4461
4462 } else if (mddev->pers)
4463 printk(KERN_INFO "md: %s switched to read-only mode.\n",
4464 mdname(mddev));
4465 err = 0;
4466 blk_integrity_unregister(disk);
4467 md_new_event(mddev);
4468 sysfs_notify_dirent(mddev->sysfs_state);
4469 return err;
4470}
4471
4472#ifndef MODULE
4473static void autorun_array(mddev_t *mddev)
4474{
4475 mdk_rdev_t *rdev;
4476 int err;
4477
4478 if (list_empty(&mddev->disks))
4479 return;
4480
4481 printk(KERN_INFO "md: running: ");
4482
4483 list_for_each_entry(rdev, &mddev->disks, same_set) {
4484 char b[BDEVNAME_SIZE];
4485 printk("<%s>", bdevname(rdev->bdev,b));
4486 }
4487 printk("\n");
4488
4489 err = do_md_run(mddev);
4490 if (err) {
4491 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
4492 do_md_stop(mddev, 0, 0);
4493 }
4494}
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508static void autorun_devices(int part)
4509{
4510 mdk_rdev_t *rdev0, *rdev, *tmp;
4511 mddev_t *mddev;
4512 char b[BDEVNAME_SIZE];
4513
4514 printk(KERN_INFO "md: autorun ...\n");
4515 while (!list_empty(&pending_raid_disks)) {
4516 int unit;
4517 dev_t dev;
4518 LIST_HEAD(candidates);
4519 rdev0 = list_entry(pending_raid_disks.next,
4520 mdk_rdev_t, same_set);
4521
4522 printk(KERN_INFO "md: considering %s ...\n",
4523 bdevname(rdev0->bdev,b));
4524 INIT_LIST_HEAD(&candidates);
4525 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
4526 if (super_90_load(rdev, rdev0, 0) >= 0) {
4527 printk(KERN_INFO "md: adding %s ...\n",
4528 bdevname(rdev->bdev,b));
4529 list_move(&rdev->same_set, &candidates);
4530 }
4531
4532
4533
4534
4535
4536 if (part) {
4537 dev = MKDEV(mdp_major,
4538 rdev0->preferred_minor << MdpMinorShift);
4539 unit = MINOR(dev) >> MdpMinorShift;
4540 } else {
4541 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
4542 unit = MINOR(dev);
4543 }
4544 if (rdev0->preferred_minor != unit) {
4545 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
4546 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
4547 break;
4548 }
4549
4550 md_probe(dev, NULL, NULL);
4551 mddev = mddev_find(dev);
4552 if (!mddev || !mddev->gendisk) {
4553 if (mddev)
4554 mddev_put(mddev);
4555 printk(KERN_ERR
4556 "md: cannot allocate memory for md drive.\n");
4557 break;
4558 }
4559 if (mddev_lock(mddev))
4560 printk(KERN_WARNING "md: %s locked, cannot run\n",
4561 mdname(mddev));
4562 else if (mddev->raid_disks || mddev->major_version
4563 || !list_empty(&mddev->disks)) {
4564 printk(KERN_WARNING
4565 "md: %s already running, cannot run %s\n",
4566 mdname(mddev), bdevname(rdev0->bdev,b));
4567 mddev_unlock(mddev);
4568 } else {
4569 printk(KERN_INFO "md: created %s\n", mdname(mddev));
4570 mddev->persistent = 1;
4571 rdev_for_each_list(rdev, tmp, &candidates) {
4572 list_del_init(&rdev->same_set);
4573 if (bind_rdev_to_array(rdev, mddev))
4574 export_rdev(rdev);
4575 }
4576 autorun_array(mddev);
4577 mddev_unlock(mddev);
4578 }
4579
4580
4581
4582 rdev_for_each_list(rdev, tmp, &candidates) {
4583 list_del_init(&rdev->same_set);
4584 export_rdev(rdev);
4585 }
4586 mddev_put(mddev);
4587 }
4588 printk(KERN_INFO "md: ... autorun DONE.\n");
4589}
4590#endif
4591
4592static int get_version(void __user * arg)
4593{
4594 mdu_version_t ver;
4595
4596 ver.major = MD_MAJOR_VERSION;
4597 ver.minor = MD_MINOR_VERSION;
4598 ver.patchlevel = MD_PATCHLEVEL_VERSION;
4599
4600 if (copy_to_user(arg, &ver, sizeof(ver)))
4601 return -EFAULT;
4602
4603 return 0;
4604}
4605
4606static int get_array_info(mddev_t * mddev, void __user * arg)
4607{
4608 mdu_array_info_t info;
4609 int nr,working,insync,failed,spare;
4610 mdk_rdev_t *rdev;
4611
4612 nr=working=insync=failed=spare=0;
4613 list_for_each_entry(rdev, &mddev->disks, same_set) {
4614 nr++;
4615 if (test_bit(Faulty, &rdev->flags))
4616 failed++;
4617 else {
4618 working++;
4619 if (test_bit(In_sync, &rdev->flags))
4620 insync++;
4621 else
4622 spare++;
4623 }
4624 }
4625
4626 info.major_version = mddev->major_version;
4627 info.minor_version = mddev->minor_version;
4628 info.patch_version = MD_PATCHLEVEL_VERSION;
4629 info.ctime = mddev->ctime;
4630 info.level = mddev->level;
4631 info.size = mddev->dev_sectors / 2;
4632 if (info.size != mddev->dev_sectors / 2)
4633 info.size = -1;
4634 info.nr_disks = nr;
4635 info.raid_disks = mddev->raid_disks;
4636 info.md_minor = mddev->md_minor;
4637 info.not_persistent= !mddev->persistent;
4638
4639 info.utime = mddev->utime;
4640 info.state = 0;
4641 if (mddev->in_sync)
4642 info.state = (1<<MD_SB_CLEAN);
4643 if (mddev->bitmap && mddev->bitmap_offset)
4644 info.state = (1<<MD_SB_BITMAP_PRESENT);
4645 info.active_disks = insync;
4646 info.working_disks = working;
4647 info.failed_disks = failed;
4648 info.spare_disks = spare;
4649
4650 info.layout = mddev->layout;
4651 info.chunk_size = mddev->chunk_sectors << 9;
4652
4653 if (copy_to_user(arg, &info, sizeof(info)))
4654 return -EFAULT;
4655
4656 return 0;
4657}
4658
4659static int get_bitmap_file(mddev_t * mddev, void __user * arg)
4660{
4661 mdu_bitmap_file_t *file = NULL;
4662 char *ptr, *buf = NULL;
4663 int err = -ENOMEM;
4664
4665 if (md_allow_write(mddev))
4666 file = kmalloc(sizeof(*file), GFP_NOIO);
4667 else
4668 file = kmalloc(sizeof(*file), GFP_KERNEL);
4669
4670 if (!file)
4671 goto out;
4672
4673
4674 if (!mddev->bitmap || !mddev->bitmap->file) {
4675 file->pathname[0] = '\0';
4676 goto copy_out;
4677 }
4678
4679 buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
4680 if (!buf)
4681 goto out;
4682
4683 ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname));
4684 if (IS_ERR(ptr))
4685 goto out;
4686
4687 strcpy(file->pathname, ptr);
4688
4689copy_out:
4690 err = 0;
4691 if (copy_to_user(arg, file, sizeof(*file)))
4692 err = -EFAULT;
4693out:
4694 kfree(buf);
4695 kfree(file);
4696 return err;
4697}
4698
4699static int get_disk_info(mddev_t * mddev, void __user * arg)
4700{
4701 mdu_disk_info_t info;
4702 mdk_rdev_t *rdev;
4703
4704 if (copy_from_user(&info, arg, sizeof(info)))
4705 return -EFAULT;
4706
4707 rdev = find_rdev_nr(mddev, info.number);
4708 if (rdev) {
4709 info.major = MAJOR(rdev->bdev->bd_dev);
4710 info.minor = MINOR(rdev->bdev->bd_dev);
4711 info.raid_disk = rdev->raid_disk;
4712 info.state = 0;
4713 if (test_bit(Faulty, &rdev->flags))
4714 info.state |= (1<<MD_DISK_FAULTY);
4715 else if (test_bit(In_sync, &rdev->flags)) {
4716 info.state |= (1<<MD_DISK_ACTIVE);
4717 info.state |= (1<<MD_DISK_SYNC);
4718 }
4719 if (test_bit(WriteMostly, &rdev->flags))
4720 info.state |= (1<<MD_DISK_WRITEMOSTLY);
4721 } else {
4722 info.major = info.minor = 0;
4723 info.raid_disk = -1;
4724 info.state = (1<<MD_DISK_REMOVED);
4725 }
4726
4727 if (copy_to_user(arg, &info, sizeof(info)))
4728 return -EFAULT;
4729
4730 return 0;
4731}
4732
4733static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
4734{
4735 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
4736 mdk_rdev_t *rdev;
4737 dev_t dev = MKDEV(info->major,info->minor);
4738
4739 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
4740 return -EOVERFLOW;
4741
4742 if (!mddev->raid_disks) {
4743 int err;
4744
4745 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
4746 if (IS_ERR(rdev)) {
4747 printk(KERN_WARNING
4748 "md: md_import_device returned %ld\n",
4749 PTR_ERR(rdev));
4750 return PTR_ERR(rdev);
4751 }
4752 if (!list_empty(&mddev->disks)) {
4753 mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
4754 mdk_rdev_t, same_set);
4755 err = super_types[mddev->major_version]
4756 .load_super(rdev, rdev0, mddev->minor_version);
4757 if (err < 0) {
4758 printk(KERN_WARNING
4759 "md: %s has different UUID to %s\n",
4760 bdevname(rdev->bdev,b),
4761 bdevname(rdev0->bdev,b2));
4762 export_rdev(rdev);
4763 return -EINVAL;
4764 }
4765 }
4766 err = bind_rdev_to_array(rdev, mddev);
4767 if (err)
4768 export_rdev(rdev);
4769 return err;
4770 }
4771
4772
4773
4774
4775
4776
4777 if (mddev->pers) {
4778 int err;
4779 if (!mddev->pers->hot_add_disk) {
4780 printk(KERN_WARNING
4781 "%s: personality does not support diskops!\n",
4782 mdname(mddev));
4783 return -EINVAL;
4784 }
4785 if (mddev->persistent)
4786 rdev = md_import_device(dev, mddev->major_version,
4787 mddev->minor_version);
4788 else
4789 rdev = md_import_device(dev, -1, -1);
4790 if (IS_ERR(rdev)) {
4791 printk(KERN_WARNING
4792 "md: md_import_device returned %ld\n",
4793 PTR_ERR(rdev));
4794 return PTR_ERR(rdev);
4795 }
4796
4797 if (!mddev->persistent) {
4798 if (info->state & (1<<MD_DISK_SYNC) &&
4799 info->raid_disk < mddev->raid_disks)
4800 rdev->raid_disk = info->raid_disk;
4801 else
4802 rdev->raid_disk = -1;
4803 } else
4804 super_types[mddev->major_version].
4805 validate_super(mddev, rdev);
4806 rdev->saved_raid_disk = rdev->raid_disk;
4807
4808 clear_bit(In_sync, &rdev->flags);
4809 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
4810 set_bit(WriteMostly, &rdev->flags);
4811 else
4812 clear_bit(WriteMostly, &rdev->flags);
4813
4814 rdev->raid_disk = -1;
4815 err = bind_rdev_to_array(rdev, mddev);
4816 if (!err && !mddev->pers->hot_remove_disk) {
4817
4818
4819
4820
4821 super_types[mddev->major_version].
4822 validate_super(mddev, rdev);
4823 err = mddev->pers->hot_add_disk(mddev, rdev);
4824 if (err)
4825 unbind_rdev_from_array(rdev);
4826 }
4827 if (err)
4828 export_rdev(rdev);
4829 else
4830 sysfs_notify_dirent(rdev->sysfs_state);
4831
4832 md_update_sb(mddev, 1);
4833 if (mddev->degraded)
4834 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4835 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4836 md_wakeup_thread(mddev->thread);
4837 return err;
4838 }
4839
4840
4841
4842
4843 if (mddev->major_version != 0) {
4844 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
4845 mdname(mddev));
4846 return -EINVAL;
4847 }
4848
4849 if (!(info->state & (1<<MD_DISK_FAULTY))) {
4850 int err;
4851 rdev = md_import_device(dev, -1, 0);
4852 if (IS_ERR(rdev)) {
4853 printk(KERN_WARNING
4854 "md: error, md_import_device() returned %ld\n",
4855 PTR_ERR(rdev));
4856 return PTR_ERR(rdev);
4857 }
4858 rdev->desc_nr = info->number;
4859 if (info->raid_disk < mddev->raid_disks)
4860 rdev->raid_disk = info->raid_disk;
4861 else
4862 rdev->raid_disk = -1;
4863
4864 if (rdev->raid_disk < mddev->raid_disks)
4865 if (info->state & (1<<MD_DISK_SYNC))
4866 set_bit(In_sync, &rdev->flags);
4867
4868 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
4869 set_bit(WriteMostly, &rdev->flags);
4870
4871 if (!mddev->persistent) {
4872 printk(KERN_INFO "md: nonpersistent superblock ...\n");
4873 rdev->sb_start = rdev->bdev->bd_inode->i_size / 512;
4874 } else
4875 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
4876 rdev->sectors = rdev->sb_start;
4877
4878 err = bind_rdev_to_array(rdev, mddev);
4879 if (err) {
4880 export_rdev(rdev);
4881 return err;
4882 }
4883 }
4884
4885 return 0;
4886}
4887
4888static int hot_remove_disk(mddev_t * mddev, dev_t dev)
4889{
4890 char b[BDEVNAME_SIZE];
4891 mdk_rdev_t *rdev;
4892
4893 rdev = find_rdev(mddev, dev);
4894 if (!rdev)
4895 return -ENXIO;
4896
4897 if (rdev->raid_disk >= 0)
4898 goto busy;
4899
4900 kick_rdev_from_array(rdev);
4901 md_update_sb(mddev, 1);
4902 md_new_event(mddev);
4903
4904 return 0;
4905busy:
4906 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
4907 bdevname(rdev->bdev,b), mdname(mddev));
4908 return -EBUSY;
4909}
4910
4911static int hot_add_disk(mddev_t * mddev, dev_t dev)
4912{
4913 char b[BDEVNAME_SIZE];
4914 int err;
4915 mdk_rdev_t *rdev;
4916
4917 if (!mddev->pers)
4918 return -ENODEV;
4919
4920 if (mddev->major_version != 0) {
4921 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
4922 " version-0 superblocks.\n",
4923 mdname(mddev));
4924 return -EINVAL;
4925 }
4926 if (!mddev->pers->hot_add_disk) {
4927 printk(KERN_WARNING
4928 "%s: personality does not support diskops!\n",
4929 mdname(mddev));
4930 return -EINVAL;
4931 }
4932
4933 rdev = md_import_device(dev, -1, 0);
4934 if (IS_ERR(rdev)) {
4935 printk(KERN_WARNING
4936 "md: error, md_import_device() returned %ld\n",
4937 PTR_ERR(rdev));
4938 return -EINVAL;
4939 }
4940
4941 if (mddev->persistent)
4942 rdev->sb_start = calc_dev_sboffset(rdev->bdev);
4943 else
4944 rdev->sb_start = rdev->bdev->bd_inode->i_size / 512;
4945
4946 rdev->sectors = rdev->sb_start;
4947
4948 if (test_bit(Faulty, &rdev->flags)) {
4949 printk(KERN_WARNING
4950 "md: can not hot-add faulty %s disk to %s!\n",
4951 bdevname(rdev->bdev,b), mdname(mddev));
4952 err = -EINVAL;
4953 goto abort_export;
4954 }
4955 clear_bit(In_sync, &rdev->flags);
4956 rdev->desc_nr = -1;
4957 rdev->saved_raid_disk = -1;
4958 err = bind_rdev_to_array(rdev, mddev);
4959 if (err)
4960 goto abort_export;
4961
4962
4963
4964
4965
4966
4967 rdev->raid_disk = -1;
4968
4969 md_update_sb(mddev, 1);
4970
4971
4972
4973
4974
4975 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4976 md_wakeup_thread(mddev->thread);
4977 md_new_event(mddev);
4978 return 0;
4979
4980abort_export:
4981 export_rdev(rdev);
4982 return err;
4983}
4984
4985static int set_bitmap_file(mddev_t *mddev, int fd)
4986{
4987 int err;
4988
4989 if (mddev->pers) {
4990 if (!mddev->pers->quiesce)
4991 return -EBUSY;
4992 if (mddev->recovery || mddev->sync_thread)
4993 return -EBUSY;
4994
4995 }
4996
4997
4998 if (fd >= 0) {
4999 if (mddev->bitmap)
5000 return -EEXIST;
5001 mddev->bitmap_file = fget(fd);
5002
5003 if (mddev->bitmap_file == NULL) {
5004 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
5005 mdname(mddev));
5006 return -EBADF;
5007 }
5008
5009 err = deny_bitmap_write_access(mddev->bitmap_file);
5010 if (err) {
5011 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
5012 mdname(mddev));
5013 fput(mddev->bitmap_file);
5014 mddev->bitmap_file = NULL;
5015 return err;
5016 }
5017 mddev->bitmap_offset = 0;
5018 } else if (mddev->bitmap == NULL)
5019 return -ENOENT;
5020 err = 0;
5021 if (mddev->pers) {
5022 mddev->pers->quiesce(mddev, 1);
5023 if (fd >= 0)
5024 err = bitmap_create(mddev);
5025 if (fd < 0 || err) {
5026 bitmap_destroy(mddev);
5027 fd = -1;
5028 }
5029 mddev->pers->quiesce(mddev, 0);
5030 }
5031 if (fd < 0) {
5032 if (mddev->bitmap_file) {
5033 restore_bitmap_write_access(mddev->bitmap_file);
5034 fput(mddev->bitmap_file);
5035 }
5036 mddev->bitmap_file = NULL;
5037 }
5038
5039 return err;
5040}
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
5056{
5057
5058 if (info->raid_disks == 0) {
5059
5060 if (info->major_version < 0 ||
5061 info->major_version >= ARRAY_SIZE(super_types) ||
5062 super_types[info->major_version].name == NULL) {
5063
5064 printk(KERN_INFO
5065 "md: superblock version %d not known\n",
5066 info->major_version);
5067 return -EINVAL;
5068 }
5069 mddev->major_version = info->major_version;
5070 mddev->minor_version = info->minor_version;
5071 mddev->patch_version = info->patch_version;
5072 mddev->persistent = !info->not_persistent;
5073 return 0;
5074 }
5075 mddev->major_version = MD_MAJOR_VERSION;
5076 mddev->minor_version = MD_MINOR_VERSION;
5077 mddev->patch_version = MD_PATCHLEVEL_VERSION;
5078 mddev->ctime = get_seconds();
5079
5080 mddev->level = info->level;
5081 mddev->clevel[0] = 0;
5082 mddev->dev_sectors = 2 * (sector_t)info->size;
5083 mddev->raid_disks = info->raid_disks;
5084
5085
5086
5087 if (info->state & (1<<MD_SB_CLEAN))
5088 mddev->recovery_cp = MaxSector;
5089 else
5090 mddev->recovery_cp = 0;
5091 mddev->persistent = ! info->not_persistent;
5092 mddev->external = 0;
5093
5094 mddev->layout = info->layout;
5095 mddev->chunk_sectors = info->chunk_size >> 9;
5096
5097 mddev->max_disks = MD_SB_DISKS;
5098
5099 if (mddev->persistent)
5100 mddev->flags = 0;
5101 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5102
5103 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
5104 mddev->bitmap_offset = 0;
5105
5106 mddev->reshape_position = MaxSector;
5107
5108
5109
5110
5111 get_random_bytes(mddev->uuid, 16);
5112
5113 mddev->new_level = mddev->level;
5114 mddev->new_chunk_sectors = mddev->chunk_sectors;
5115 mddev->new_layout = mddev->layout;
5116 mddev->delta_disks = 0;
5117
5118 return 0;
5119}
5120
5121void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors)
5122{
5123 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
5124
5125 if (mddev->external_size)
5126 return;
5127
5128 mddev->array_sectors = array_sectors;
5129}
5130EXPORT_SYMBOL(md_set_array_sectors);
5131
5132static int update_size(mddev_t *mddev, sector_t num_sectors)
5133{
5134 mdk_rdev_t *rdev;
5135 int rv;
5136 int fit = (num_sectors == 0);
5137
5138 if (mddev->pers->resize == NULL)
5139 return -EINVAL;
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150 if (mddev->sync_thread)
5151 return -EBUSY;
5152 if (mddev->bitmap)
5153
5154
5155
5156 return -EBUSY;
5157 list_for_each_entry(rdev, &mddev->disks, same_set) {
5158 sector_t avail = rdev->sectors;
5159
5160 if (fit && (num_sectors == 0 || num_sectors > avail))
5161 num_sectors = avail;
5162 if (avail < num_sectors)
5163 return -ENOSPC;
5164 }
5165 rv = mddev->pers->resize(mddev, num_sectors);
5166 if (!rv)
5167 revalidate_disk(mddev->gendisk);
5168 return rv;
5169}
5170
5171static int update_raid_disks(mddev_t *mddev, int raid_disks)
5172{
5173 int rv;
5174
5175 if (mddev->pers->check_reshape == NULL)
5176 return -EINVAL;
5177 if (raid_disks <= 0 ||
5178 raid_disks >= mddev->max_disks)
5179 return -EINVAL;
5180 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
5181 return -EBUSY;
5182 mddev->delta_disks = raid_disks - mddev->raid_disks;
5183
5184 rv = mddev->pers->check_reshape(mddev);
5185 return rv;
5186}
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
5198{
5199 int rv = 0;
5200 int cnt = 0;
5201 int state = 0;
5202
5203
5204 if (mddev->bitmap && mddev->bitmap_offset)
5205 state |= (1 << MD_SB_BITMAP_PRESENT);
5206
5207 if (mddev->major_version != info->major_version ||
5208 mddev->minor_version != info->minor_version ||
5209
5210 mddev->ctime != info->ctime ||
5211 mddev->level != info->level ||
5212
5213 !mddev->persistent != info->not_persistent||
5214 mddev->chunk_sectors != info->chunk_size >> 9 ||
5215
5216 ((state^info->state) & 0xfffffe00)
5217 )
5218 return -EINVAL;
5219
5220 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
5221 cnt++;
5222 if (mddev->raid_disks != info->raid_disks)
5223 cnt++;
5224 if (mddev->layout != info->layout)
5225 cnt++;
5226 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
5227 cnt++;
5228 if (cnt == 0)
5229 return 0;
5230 if (cnt > 1)
5231 return -EINVAL;
5232
5233 if (mddev->layout != info->layout) {
5234
5235
5236
5237
5238 if (mddev->pers->check_reshape == NULL)
5239 return -EINVAL;
5240 else {
5241 mddev->new_layout = info->layout;
5242 rv = mddev->pers->check_reshape(mddev);
5243 if (rv)
5244 mddev->new_layout = mddev->layout;
5245 return rv;
5246 }
5247 }
5248 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
5249 rv = update_size(mddev, (sector_t)info->size * 2);
5250
5251 if (mddev->raid_disks != info->raid_disks)
5252 rv = update_raid_disks(mddev, info->raid_disks);
5253
5254 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
5255 if (mddev->pers->quiesce == NULL)
5256 return -EINVAL;
5257 if (mddev->recovery || mddev->sync_thread)
5258 return -EBUSY;
5259 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
5260
5261 if (mddev->bitmap)
5262 return -EEXIST;
5263 if (mddev->default_bitmap_offset == 0)
5264 return -EINVAL;
5265 mddev->bitmap_offset = mddev->default_bitmap_offset;
5266 mddev->pers->quiesce(mddev, 1);
5267 rv = bitmap_create(mddev);
5268 if (rv)
5269 bitmap_destroy(mddev);
5270 mddev->pers->quiesce(mddev, 0);
5271 } else {
5272
5273 if (!mddev->bitmap)
5274 return -ENOENT;
5275 if (mddev->bitmap->file)
5276 return -EINVAL;
5277 mddev->pers->quiesce(mddev, 1);
5278 bitmap_destroy(mddev);
5279 mddev->pers->quiesce(mddev, 0);
5280 mddev->bitmap_offset = 0;
5281 }
5282 }
5283 md_update_sb(mddev, 1);
5284 return rv;
5285}
5286
5287static int set_disk_faulty(mddev_t *mddev, dev_t dev)
5288{
5289 mdk_rdev_t *rdev;
5290
5291 if (mddev->pers == NULL)
5292 return -ENODEV;
5293
5294 rdev = find_rdev(mddev, dev);
5295 if (!rdev)
5296 return -ENODEV;
5297
5298 md_error(mddev, rdev);
5299 return 0;
5300}
5301
5302
5303
5304
5305
5306
5307
5308static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
5309{
5310 mddev_t *mddev = bdev->bd_disk->private_data;
5311
5312 geo->heads = 2;
5313 geo->sectors = 4;
5314 geo->cylinders = get_capacity(mddev->gendisk) / 8;
5315 return 0;
5316}
5317
5318static int md_ioctl(struct block_device *bdev, fmode_t mode,
5319 unsigned int cmd, unsigned long arg)
5320{
5321 int err = 0;
5322 void __user *argp = (void __user *)arg;
5323 mddev_t *mddev = NULL;
5324
5325 if (!capable(CAP_SYS_ADMIN))
5326 return -EACCES;
5327
5328
5329
5330
5331
5332 switch (cmd)
5333 {
5334 case RAID_VERSION:
5335 err = get_version(argp);
5336 goto done;
5337
5338 case PRINT_RAID_DEBUG:
5339 err = 0;
5340 md_print_devices();
5341 goto done;
5342
5343#ifndef MODULE
5344 case RAID_AUTORUN:
5345 err = 0;
5346 autostart_arrays(arg);
5347 goto done;
5348#endif
5349 default:;
5350 }
5351
5352
5353
5354
5355
5356 mddev = bdev->bd_disk->private_data;
5357
5358 if (!mddev) {
5359 BUG();
5360 goto abort;
5361 }
5362
5363 err = mddev_lock(mddev);
5364 if (err) {
5365 printk(KERN_INFO
5366 "md: ioctl lock interrupted, reason %d, cmd %d\n",
5367 err, cmd);
5368 goto abort;
5369 }
5370
5371 switch (cmd)
5372 {
5373 case SET_ARRAY_INFO:
5374 {
5375 mdu_array_info_t info;
5376 if (!arg)
5377 memset(&info, 0, sizeof(info));
5378 else if (copy_from_user(&info, argp, sizeof(info))) {
5379 err = -EFAULT;
5380 goto abort_unlock;
5381 }
5382 if (mddev->pers) {
5383 err = update_array_info(mddev, &info);
5384 if (err) {
5385 printk(KERN_WARNING "md: couldn't update"
5386 " array info. %d\n", err);
5387 goto abort_unlock;
5388 }
5389 goto done_unlock;
5390 }
5391 if (!list_empty(&mddev->disks)) {
5392 printk(KERN_WARNING
5393 "md: array %s already has disks!\n",
5394 mdname(mddev));
5395 err = -EBUSY;
5396 goto abort_unlock;
5397 }
5398 if (mddev->raid_disks) {
5399 printk(KERN_WARNING
5400 "md: array %s already initialised!\n",
5401 mdname(mddev));
5402 err = -EBUSY;
5403 goto abort_unlock;
5404 }
5405 err = set_array_info(mddev, &info);
5406 if (err) {
5407 printk(KERN_WARNING "md: couldn't set"
5408 " array info. %d\n", err);
5409 goto abort_unlock;
5410 }
5411 }
5412 goto done_unlock;
5413
5414 default:;
5415 }
5416
5417
5418
5419
5420
5421
5422 if ((!mddev->raid_disks && !mddev->external)
5423 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
5424 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
5425 && cmd != GET_BITMAP_FILE) {
5426 err = -ENODEV;
5427 goto abort_unlock;
5428 }
5429
5430
5431
5432
5433 switch (cmd)
5434 {
5435 case GET_ARRAY_INFO:
5436 err = get_array_info(mddev, argp);
5437 goto done_unlock;
5438
5439 case GET_BITMAP_FILE:
5440 err = get_bitmap_file(mddev, argp);
5441 goto done_unlock;
5442
5443 case GET_DISK_INFO:
5444 err = get_disk_info(mddev, argp);
5445 goto done_unlock;
5446
5447 case RESTART_ARRAY_RW:
5448 err = restart_array(mddev);
5449 goto done_unlock;
5450
5451 case STOP_ARRAY:
5452 err = do_md_stop(mddev, 0, 1);
5453 goto done_unlock;
5454
5455 case STOP_ARRAY_RO:
5456 err = do_md_stop(mddev, 1, 1);
5457 goto done_unlock;
5458
5459 }
5460
5461
5462
5463
5464
5465
5466
5467
5468 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
5469 if (mddev->ro == 2) {
5470 mddev->ro = 0;
5471 sysfs_notify_dirent(mddev->sysfs_state);
5472 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5473 md_wakeup_thread(mddev->thread);
5474 } else {
5475 err = -EROFS;
5476 goto abort_unlock;
5477 }
5478 }
5479
5480 switch (cmd)
5481 {
5482 case ADD_NEW_DISK:
5483 {
5484 mdu_disk_info_t info;
5485 if (copy_from_user(&info, argp, sizeof(info)))
5486 err = -EFAULT;
5487 else
5488 err = add_new_disk(mddev, &info);
5489 goto done_unlock;
5490 }
5491
5492 case HOT_REMOVE_DISK:
5493 err = hot_remove_disk(mddev, new_decode_dev(arg));
5494 goto done_unlock;
5495
5496 case HOT_ADD_DISK:
5497 err = hot_add_disk(mddev, new_decode_dev(arg));
5498 goto done_unlock;
5499
5500 case SET_DISK_FAULTY:
5501 err = set_disk_faulty(mddev, new_decode_dev(arg));
5502 goto done_unlock;
5503
5504 case RUN_ARRAY:
5505 err = do_md_run(mddev);
5506 goto done_unlock;
5507
5508 case SET_BITMAP_FILE:
5509 err = set_bitmap_file(mddev, (int)arg);
5510 goto done_unlock;
5511
5512 default:
5513 err = -EINVAL;
5514 goto abort_unlock;
5515 }
5516
5517done_unlock:
5518abort_unlock:
5519 if (mddev->hold_active == UNTIL_IOCTL &&
5520 err != -EINVAL)
5521 mddev->hold_active = 0;
5522 mddev_unlock(mddev);
5523
5524 return err;
5525done:
5526 if (err)
5527 MD_BUG();
5528abort:
5529 return err;
5530}
5531
5532static int md_open(struct block_device *bdev, fmode_t mode)
5533{
5534
5535
5536
5537
5538 mddev_t *mddev = mddev_find(bdev->bd_dev);
5539 int err;
5540
5541 if (mddev->gendisk != bdev->bd_disk) {
5542
5543
5544
5545 mddev_put(mddev);
5546
5547 flush_scheduled_work();
5548
5549 return -ERESTARTSYS;
5550 }
5551 BUG_ON(mddev != bdev->bd_disk->private_data);
5552
5553 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
5554 goto out;
5555
5556 err = 0;
5557 atomic_inc(&mddev->openers);
5558 mutex_unlock(&mddev->open_mutex);
5559
5560 check_disk_change(bdev);
5561 out:
5562 return err;
5563}
5564
5565static int md_release(struct gendisk *disk, fmode_t mode)
5566{
5567 mddev_t *mddev = disk->private_data;
5568
5569 BUG_ON(!mddev);
5570 atomic_dec(&mddev->openers);
5571 mddev_put(mddev);
5572
5573 return 0;
5574}
5575
5576static int md_media_changed(struct gendisk *disk)
5577{
5578 mddev_t *mddev = disk->private_data;
5579
5580 return mddev->changed;
5581}
5582
5583static int md_revalidate(struct gendisk *disk)
5584{
5585 mddev_t *mddev = disk->private_data;
5586
5587 mddev->changed = 0;
5588 return 0;
5589}
5590static const struct block_device_operations md_fops =
5591{
5592 .owner = THIS_MODULE,
5593 .open = md_open,
5594 .release = md_release,
5595 .ioctl = md_ioctl,
5596 .getgeo = md_getgeo,
5597 .media_changed = md_media_changed,
5598 .revalidate_disk= md_revalidate,
5599};
5600
5601static int md_thread(void * arg)
5602{
5603 mdk_thread_t *thread = arg;
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617 allow_signal(SIGKILL);
5618 while (!kthread_should_stop()) {
5619
5620
5621
5622
5623
5624
5625 if (signal_pending(current))
5626 flush_signals(current);
5627
5628 wait_event_interruptible_timeout
5629 (thread->wqueue,
5630 test_bit(THREAD_WAKEUP, &thread->flags)
5631 || kthread_should_stop(),
5632 thread->timeout);
5633
5634 clear_bit(THREAD_WAKEUP, &thread->flags);
5635
5636 thread->run(thread->mddev);
5637 }
5638
5639 return 0;
5640}
5641
5642void md_wakeup_thread(mdk_thread_t *thread)
5643{
5644 if (thread) {
5645 dprintk("md: waking up MD thread %s.\n", thread->tsk->comm);
5646 set_bit(THREAD_WAKEUP, &thread->flags);
5647 wake_up(&thread->wqueue);
5648 }
5649}
5650
5651mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
5652 const char *name)
5653{
5654 mdk_thread_t *thread;
5655
5656 thread = kzalloc(sizeof(mdk_thread_t), GFP_KERNEL);
5657 if (!thread)
5658 return NULL;
5659
5660 init_waitqueue_head(&thread->wqueue);
5661
5662 thread->run = run;
5663 thread->mddev = mddev;
5664 thread->timeout = MAX_SCHEDULE_TIMEOUT;
5665 thread->tsk = kthread_run(md_thread, thread,
5666 "%s_%s",
5667 mdname(thread->mddev),
5668 name ?: mddev->pers->name);
5669 if (IS_ERR(thread->tsk)) {
5670 kfree(thread);
5671 return NULL;
5672 }
5673 return thread;
5674}
5675
5676void md_unregister_thread(mdk_thread_t *thread)
5677{
5678 if (!thread)
5679 return;
5680 dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
5681
5682 kthread_stop(thread->tsk);
5683 kfree(thread);
5684}
5685
5686void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
5687{
5688 if (!mddev) {
5689 MD_BUG();
5690 return;
5691 }
5692
5693 if (!rdev || test_bit(Faulty, &rdev->flags))
5694 return;
5695
5696 if (mddev->external)
5697 set_bit(Blocked, &rdev->flags);
5698
5699
5700
5701
5702
5703
5704
5705 if (!mddev->pers)
5706 return;
5707 if (!mddev->pers->error_handler)
5708 return;
5709 mddev->pers->error_handler(mddev,rdev);
5710 if (mddev->degraded)
5711 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5712 set_bit(StateChanged, &rdev->flags);
5713 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5714 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5715 md_wakeup_thread(mddev->thread);
5716 md_new_event_inintr(mddev);
5717}
5718
5719
5720
5721static void status_unused(struct seq_file *seq)
5722{
5723 int i = 0;
5724 mdk_rdev_t *rdev;
5725
5726 seq_printf(seq, "unused devices: ");
5727
5728 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
5729 char b[BDEVNAME_SIZE];
5730 i++;
5731 seq_printf(seq, "%s ",
5732 bdevname(rdev->bdev,b));
5733 }
5734 if (!i)
5735 seq_printf(seq, "<none>");
5736
5737 seq_printf(seq, "\n");
5738}
5739
5740
5741static void status_resync(struct seq_file *seq, mddev_t * mddev)
5742{
5743 sector_t max_sectors, resync, res;
5744 unsigned long dt, db;
5745 sector_t rt;
5746 int scale;
5747 unsigned int per_milli;
5748
5749 resync = mddev->curr_resync - atomic_read(&mddev->recovery_active);
5750
5751 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
5752 max_sectors = mddev->resync_max_sectors;
5753 else
5754 max_sectors = mddev->dev_sectors;
5755
5756
5757
5758
5759 if (!max_sectors) {
5760 MD_BUG();
5761 return;
5762 }
5763
5764
5765
5766
5767
5768 scale = 10;
5769 if (sizeof(sector_t) > sizeof(unsigned long)) {
5770 while ( max_sectors/2 > (1ULL<<(scale+32)))
5771 scale++;
5772 }
5773 res = (resync>>scale)*1000;
5774 sector_div(res, (u32)((max_sectors>>scale)+1));
5775
5776 per_milli = res;
5777 {
5778 int i, x = per_milli/50, y = 20-x;
5779 seq_printf(seq, "[");
5780 for (i = 0; i < x; i++)
5781 seq_printf(seq, "=");
5782 seq_printf(seq, ">");
5783 for (i = 0; i < y; i++)
5784 seq_printf(seq, ".");
5785 seq_printf(seq, "] ");
5786 }
5787 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
5788 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
5789 "reshape" :
5790 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
5791 "check" :
5792 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
5793 "resync" : "recovery"))),
5794 per_milli/10, per_milli % 10,
5795 (unsigned long long) resync/2,
5796 (unsigned long long) max_sectors/2);
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812 dt = ((jiffies - mddev->resync_mark) / HZ);
5813 if (!dt) dt++;
5814 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
5815 - mddev->resync_mark_cnt;
5816
5817 rt = max_sectors - resync;
5818 sector_div(rt, db/32+1);
5819 rt *= dt;
5820 rt >>= 5;
5821
5822 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
5823 ((unsigned long)rt % 60)/6);
5824
5825 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
5826}
5827
5828static void *md_seq_start(struct seq_file *seq, loff_t *pos)
5829{
5830 struct list_head *tmp;
5831 loff_t l = *pos;
5832 mddev_t *mddev;
5833
5834 if (l >= 0x10000)
5835 return NULL;
5836 if (!l--)
5837
5838 return (void*)1;
5839
5840 spin_lock(&all_mddevs_lock);
5841 list_for_each(tmp,&all_mddevs)
5842 if (!l--) {
5843 mddev = list_entry(tmp, mddev_t, all_mddevs);
5844 mddev_get(mddev);
5845 spin_unlock(&all_mddevs_lock);
5846 return mddev;
5847 }
5848 spin_unlock(&all_mddevs_lock);
5849 if (!l--)
5850 return (void*)2;
5851 return NULL;
5852}
5853
5854static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
5855{
5856 struct list_head *tmp;
5857 mddev_t *next_mddev, *mddev = v;
5858
5859 ++*pos;
5860 if (v == (void*)2)
5861 return NULL;
5862
5863 spin_lock(&all_mddevs_lock);
5864 if (v == (void*)1)
5865 tmp = all_mddevs.next;
5866 else
5867 tmp = mddev->all_mddevs.next;
5868 if (tmp != &all_mddevs)
5869 next_mddev = mddev_get(list_entry(tmp,mddev_t,all_mddevs));
5870 else {
5871 next_mddev = (void*)2;
5872 *pos = 0x10000;
5873 }
5874 spin_unlock(&all_mddevs_lock);
5875
5876 if (v != (void*)1)
5877 mddev_put(mddev);
5878 return next_mddev;
5879
5880}
5881
5882static void md_seq_stop(struct seq_file *seq, void *v)
5883{
5884 mddev_t *mddev = v;
5885
5886 if (mddev && v != (void*)1 && v != (void*)2)
5887 mddev_put(mddev);
5888}
5889
5890struct mdstat_info {
5891 int event;
5892};
5893
5894static int md_seq_show(struct seq_file *seq, void *v)
5895{
5896 mddev_t *mddev = v;
5897 sector_t sectors;
5898 mdk_rdev_t *rdev;
5899 struct mdstat_info *mi = seq->private;
5900 struct bitmap *bitmap;
5901
5902 if (v == (void*)1) {
5903 struct mdk_personality *pers;
5904 seq_printf(seq, "Personalities : ");
5905 spin_lock(&pers_lock);
5906 list_for_each_entry(pers, &pers_list, list)
5907 seq_printf(seq, "[%s] ", pers->name);
5908
5909 spin_unlock(&pers_lock);
5910 seq_printf(seq, "\n");
5911 mi->event = atomic_read(&md_event_count);
5912 return 0;
5913 }
5914 if (v == (void*)2) {
5915 status_unused(seq);
5916 return 0;
5917 }
5918
5919 if (mddev_lock(mddev) < 0)
5920 return -EINTR;
5921
5922 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
5923 seq_printf(seq, "%s : %sactive", mdname(mddev),
5924 mddev->pers ? "" : "in");
5925 if (mddev->pers) {
5926 if (mddev->ro==1)
5927 seq_printf(seq, " (read-only)");
5928 if (mddev->ro==2)
5929 seq_printf(seq, " (auto-read-only)");
5930 seq_printf(seq, " %s", mddev->pers->name);
5931 }
5932
5933 sectors = 0;
5934 list_for_each_entry(rdev, &mddev->disks, same_set) {
5935 char b[BDEVNAME_SIZE];
5936 seq_printf(seq, " %s[%d]",
5937 bdevname(rdev->bdev,b), rdev->desc_nr);
5938 if (test_bit(WriteMostly, &rdev->flags))
5939 seq_printf(seq, "(W)");
5940 if (test_bit(Faulty, &rdev->flags)) {
5941 seq_printf(seq, "(F)");
5942 continue;
5943 } else if (rdev->raid_disk < 0)
5944 seq_printf(seq, "(S)");
5945 sectors += rdev->sectors;
5946 }
5947
5948 if (!list_empty(&mddev->disks)) {
5949 if (mddev->pers)
5950 seq_printf(seq, "\n %llu blocks",
5951 (unsigned long long)
5952 mddev->array_sectors / 2);
5953 else
5954 seq_printf(seq, "\n %llu blocks",
5955 (unsigned long long)sectors / 2);
5956 }
5957 if (mddev->persistent) {
5958 if (mddev->major_version != 0 ||
5959 mddev->minor_version != 90) {
5960 seq_printf(seq," super %d.%d",
5961 mddev->major_version,
5962 mddev->minor_version);
5963 }
5964 } else if (mddev->external)
5965 seq_printf(seq, " super external:%s",
5966 mddev->metadata_type);
5967 else
5968 seq_printf(seq, " super non-persistent");
5969
5970 if (mddev->pers) {
5971 mddev->pers->status(seq, mddev);
5972 seq_printf(seq, "\n ");
5973 if (mddev->pers->sync_request) {
5974 if (mddev->curr_resync > 2) {
5975 status_resync(seq, mddev);
5976 seq_printf(seq, "\n ");
5977 } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2)
5978 seq_printf(seq, "\tresync=DELAYED\n ");
5979 else if (mddev->recovery_cp < MaxSector)
5980 seq_printf(seq, "\tresync=PENDING\n ");
5981 }
5982 } else
5983 seq_printf(seq, "\n ");
5984
5985 if ((bitmap = mddev->bitmap)) {
5986 unsigned long chunk_kb;
5987 unsigned long flags;
5988 spin_lock_irqsave(&bitmap->lock, flags);
5989 chunk_kb = bitmap->chunksize >> 10;
5990 seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
5991 "%lu%s chunk",
5992 bitmap->pages - bitmap->missing_pages,
5993 bitmap->pages,
5994 (bitmap->pages - bitmap->missing_pages)
5995 << (PAGE_SHIFT - 10),
5996 chunk_kb ? chunk_kb : bitmap->chunksize,
5997 chunk_kb ? "KB" : "B");
5998 if (bitmap->file) {
5999 seq_printf(seq, ", file: ");
6000 seq_path(seq, &bitmap->file->f_path, " \t\n");
6001 }
6002
6003 seq_printf(seq, "\n");
6004 spin_unlock_irqrestore(&bitmap->lock, flags);
6005 }
6006
6007 seq_printf(seq, "\n");
6008 }
6009 mddev_unlock(mddev);
6010
6011 return 0;
6012}
6013
6014static const struct seq_operations md_seq_ops = {
6015 .start = md_seq_start,
6016 .next = md_seq_next,
6017 .stop = md_seq_stop,
6018 .show = md_seq_show,
6019};
6020
6021static int md_seq_open(struct inode *inode, struct file *file)
6022{
6023 int error;
6024 struct mdstat_info *mi = kmalloc(sizeof(*mi), GFP_KERNEL);
6025 if (mi == NULL)
6026 return -ENOMEM;
6027
6028 error = seq_open(file, &md_seq_ops);
6029 if (error)
6030 kfree(mi);
6031 else {
6032 struct seq_file *p = file->private_data;
6033 p->private = mi;
6034 mi->event = atomic_read(&md_event_count);
6035 }
6036 return error;
6037}
6038
6039static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
6040{
6041 struct seq_file *m = filp->private_data;
6042 struct mdstat_info *mi = m->private;
6043 int mask;
6044
6045 poll_wait(filp, &md_event_waiters, wait);
6046
6047
6048 mask = POLLIN | POLLRDNORM;
6049
6050 if (mi->event != atomic_read(&md_event_count))
6051 mask |= POLLERR | POLLPRI;
6052 return mask;
6053}
6054
6055static const struct file_operations md_seq_fops = {
6056 .owner = THIS_MODULE,
6057 .open = md_seq_open,
6058 .read = seq_read,
6059 .llseek = seq_lseek,
6060 .release = seq_release_private,
6061 .poll = mdstat_poll,
6062};
6063
6064int register_md_personality(struct mdk_personality *p)
6065{
6066 spin_lock(&pers_lock);
6067 list_add_tail(&p->list, &pers_list);
6068 printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level);
6069 spin_unlock(&pers_lock);
6070 return 0;
6071}
6072
6073int unregister_md_personality(struct mdk_personality *p)
6074{
6075 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
6076 spin_lock(&pers_lock);
6077 list_del_init(&p->list);
6078 spin_unlock(&pers_lock);
6079 return 0;
6080}
6081
6082static int is_mddev_idle(mddev_t *mddev, int init)
6083{
6084 mdk_rdev_t * rdev;
6085 int idle;
6086 int curr_events;
6087
6088 idle = 1;
6089 rcu_read_lock();
6090 rdev_for_each_rcu(rdev, mddev) {
6091 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
6092 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
6093 (int)part_stat_read(&disk->part0, sectors[1]) -
6094 atomic_read(&disk->sync_io);
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117 if (init || curr_events - rdev->last_events > 64) {
6118 rdev->last_events = curr_events;
6119 idle = 0;
6120 }
6121 }
6122 rcu_read_unlock();
6123 return idle;
6124}
6125
6126void md_done_sync(mddev_t *mddev, int blocks, int ok)
6127{
6128
6129 atomic_sub(blocks, &mddev->recovery_active);
6130 wake_up(&mddev->recovery_wait);
6131 if (!ok) {
6132 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6133 md_wakeup_thread(mddev->thread);
6134
6135 }
6136}
6137
6138
6139
6140
6141
6142
6143
6144void md_write_start(mddev_t *mddev, struct bio *bi)
6145{
6146 int did_change = 0;
6147 if (bio_data_dir(bi) != WRITE)
6148 return;
6149
6150 BUG_ON(mddev->ro == 1);
6151 if (mddev->ro == 2) {
6152
6153 mddev->ro = 0;
6154 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6155 md_wakeup_thread(mddev->thread);
6156 md_wakeup_thread(mddev->sync_thread);
6157 did_change = 1;
6158 }
6159 atomic_inc(&mddev->writes_pending);
6160 if (mddev->safemode == 1)
6161 mddev->safemode = 0;
6162 if (mddev->in_sync) {
6163 spin_lock_irq(&mddev->write_lock);
6164 if (mddev->in_sync) {
6165 mddev->in_sync = 0;
6166 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6167 md_wakeup_thread(mddev->thread);
6168 did_change = 1;
6169 }
6170 spin_unlock_irq(&mddev->write_lock);
6171 }
6172 if (did_change)
6173 sysfs_notify_dirent(mddev->sysfs_state);
6174 wait_event(mddev->sb_wait,
6175 !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
6176 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
6177}
6178
6179void md_write_end(mddev_t *mddev)
6180{
6181 if (atomic_dec_and_test(&mddev->writes_pending)) {
6182 if (mddev->safemode == 2)
6183 md_wakeup_thread(mddev->thread);
6184 else if (mddev->safemode_delay)
6185 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
6186 }
6187}
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198int md_allow_write(mddev_t *mddev)
6199{
6200 if (!mddev->pers)
6201 return 0;
6202 if (mddev->ro)
6203 return 0;
6204 if (!mddev->pers->sync_request)
6205 return 0;
6206
6207 spin_lock_irq(&mddev->write_lock);
6208 if (mddev->in_sync) {
6209 mddev->in_sync = 0;
6210 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6211 if (mddev->safemode_delay &&
6212 mddev->safemode == 0)
6213 mddev->safemode = 1;
6214 spin_unlock_irq(&mddev->write_lock);
6215 md_update_sb(mddev, 0);
6216 sysfs_notify_dirent(mddev->sysfs_state);
6217 } else
6218 spin_unlock_irq(&mddev->write_lock);
6219
6220 if (test_bit(MD_CHANGE_CLEAN, &mddev->flags))
6221 return -EAGAIN;
6222 else
6223 return 0;
6224}
6225EXPORT_SYMBOL_GPL(md_allow_write);
6226
6227#define SYNC_MARKS 10
6228#define SYNC_MARK_STEP (3*HZ)
6229void md_do_sync(mddev_t *mddev)
6230{
6231 mddev_t *mddev2;
6232 unsigned int currspeed = 0,
6233 window;
6234 sector_t max_sectors,j, io_sectors;
6235 unsigned long mark[SYNC_MARKS];
6236 sector_t mark_cnt[SYNC_MARKS];
6237 int last_mark,m;
6238 struct list_head *tmp;
6239 sector_t last_check;
6240 int skipped = 0;
6241 mdk_rdev_t *rdev;
6242 char *desc;
6243
6244
6245 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
6246 return;
6247 if (mddev->ro)
6248 return;
6249
6250 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
6251 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
6252 desc = "data-check";
6253 else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
6254 desc = "requested-resync";
6255 else
6256 desc = "resync";
6257 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
6258 desc = "reshape";
6259 else
6260 desc = "recovery";
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278 do {
6279 mddev->curr_resync = 2;
6280
6281 try_again:
6282 if (kthread_should_stop()) {
6283 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6284 goto skip;
6285 }
6286 for_each_mddev(mddev2, tmp) {
6287 if (mddev2 == mddev)
6288 continue;
6289 if (!mddev->parallel_resync
6290 && mddev2->curr_resync
6291 && match_mddev_units(mddev, mddev2)) {
6292 DEFINE_WAIT(wq);
6293 if (mddev < mddev2 && mddev->curr_resync == 2) {
6294
6295 mddev->curr_resync = 1;
6296 wake_up(&resync_wait);
6297 }
6298 if (mddev > mddev2 && mddev->curr_resync == 1)
6299
6300
6301
6302 continue;
6303
6304
6305
6306
6307 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
6308 if (!kthread_should_stop() &&
6309 mddev2->curr_resync >= mddev->curr_resync) {
6310 printk(KERN_INFO "md: delaying %s of %s"
6311 " until %s has finished (they"
6312 " share one or more physical units)\n",
6313 desc, mdname(mddev), mdname(mddev2));
6314 mddev_put(mddev2);
6315 if (signal_pending(current))
6316 flush_signals(current);
6317 schedule();
6318 finish_wait(&resync_wait, &wq);
6319 goto try_again;
6320 }
6321 finish_wait(&resync_wait, &wq);
6322 }
6323 }
6324 } while (mddev->curr_resync < 2);
6325
6326 j = 0;
6327 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
6328
6329
6330
6331 max_sectors = mddev->resync_max_sectors;
6332 mddev->resync_mismatches = 0;
6333
6334 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
6335 j = mddev->resync_min;
6336 else if (!mddev->bitmap)
6337 j = mddev->recovery_cp;
6338
6339 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
6340 max_sectors = mddev->dev_sectors;
6341 else {
6342
6343 max_sectors = mddev->dev_sectors;
6344 j = MaxSector;
6345 list_for_each_entry(rdev, &mddev->disks, same_set)
6346 if (rdev->raid_disk >= 0 &&
6347 !test_bit(Faulty, &rdev->flags) &&
6348 !test_bit(In_sync, &rdev->flags) &&
6349 rdev->recovery_offset < j)
6350 j = rdev->recovery_offset;
6351 }
6352
6353 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
6354 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
6355 " %d KB/sec/disk.\n", speed_min(mddev));
6356 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
6357 "(but not more than %d KB/sec) for %s.\n",
6358 speed_max(mddev), desc);
6359
6360 is_mddev_idle(mddev, 1);
6361
6362 io_sectors = 0;
6363 for (m = 0; m < SYNC_MARKS; m++) {
6364 mark[m] = jiffies;
6365 mark_cnt[m] = io_sectors;
6366 }
6367 last_mark = 0;
6368 mddev->resync_mark = mark[last_mark];
6369 mddev->resync_mark_cnt = mark_cnt[last_mark];
6370
6371
6372
6373
6374 window = 32*(PAGE_SIZE/512);
6375 printk(KERN_INFO "md: using %dk window, over a total of %llu blocks.\n",
6376 window/2,(unsigned long long) max_sectors/2);
6377
6378 atomic_set(&mddev->recovery_active, 0);
6379 last_check = 0;
6380
6381 if (j>2) {
6382 printk(KERN_INFO
6383 "md: resuming %s of %s from checkpoint.\n",
6384 desc, mdname(mddev));
6385 mddev->curr_resync = j;
6386 }
6387
6388 while (j < max_sectors) {
6389 sector_t sectors;
6390
6391 skipped = 0;
6392
6393 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
6394 ((mddev->curr_resync > mddev->curr_resync_completed &&
6395 (mddev->curr_resync - mddev->curr_resync_completed)
6396 > (max_sectors >> 4)) ||
6397 (j - mddev->curr_resync_completed)*2
6398 >= mddev->resync_max - mddev->curr_resync_completed
6399 )) {
6400
6401 blk_unplug(mddev->queue);
6402 wait_event(mddev->recovery_wait,
6403 atomic_read(&mddev->recovery_active) == 0);
6404 mddev->curr_resync_completed =
6405 mddev->curr_resync;
6406 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6407 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
6408 }
6409
6410 while (j >= mddev->resync_max && !kthread_should_stop()) {
6411
6412
6413
6414
6415 flush_signals(current);
6416 wait_event_interruptible(mddev->recovery_wait,
6417 mddev->resync_max > j
6418 || kthread_should_stop());
6419 }
6420
6421 if (kthread_should_stop())
6422 goto interrupted;
6423
6424 sectors = mddev->pers->sync_request(mddev, j, &skipped,
6425 currspeed < speed_min(mddev));
6426 if (sectors == 0) {
6427 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6428 goto out;
6429 }
6430
6431 if (!skipped) {
6432 io_sectors += sectors;
6433 atomic_add(sectors, &mddev->recovery_active);
6434 }
6435
6436 j += sectors;
6437 if (j>1) mddev->curr_resync = j;
6438 mddev->curr_mark_cnt = io_sectors;
6439 if (last_check == 0)
6440
6441
6442
6443 md_new_event(mddev);
6444
6445 if (last_check + window > io_sectors || j == max_sectors)
6446 continue;
6447
6448 last_check = io_sectors;
6449
6450 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
6451 break;
6452
6453 repeat:
6454 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
6455
6456 int next = (last_mark+1) % SYNC_MARKS;
6457
6458 mddev->resync_mark = mark[next];
6459 mddev->resync_mark_cnt = mark_cnt[next];
6460 mark[next] = jiffies;
6461 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
6462 last_mark = next;
6463 }
6464
6465
6466 if (kthread_should_stop())
6467 goto interrupted;
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478 blk_unplug(mddev->queue);
6479 cond_resched();
6480
6481 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
6482 /((jiffies-mddev->resync_mark)/HZ +1) +1;
6483
6484 if (currspeed > speed_min(mddev)) {
6485 if ((currspeed > speed_max(mddev)) ||
6486 !is_mddev_idle(mddev, 0)) {
6487 msleep(500);
6488 goto repeat;
6489 }
6490 }
6491 }
6492 printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
6493
6494
6495
6496 out:
6497 blk_unplug(mddev->queue);
6498
6499 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
6500
6501
6502 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
6503
6504 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
6505 mddev->curr_resync > 2) {
6506 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
6507 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
6508 if (mddev->curr_resync >= mddev->recovery_cp) {
6509 printk(KERN_INFO
6510 "md: checkpointing %s of %s.\n",
6511 desc, mdname(mddev));
6512 mddev->recovery_cp = mddev->curr_resync;
6513 }
6514 } else
6515 mddev->recovery_cp = MaxSector;
6516 } else {
6517 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
6518 mddev->curr_resync = MaxSector;
6519 list_for_each_entry(rdev, &mddev->disks, same_set)
6520 if (rdev->raid_disk >= 0 &&
6521 !test_bit(Faulty, &rdev->flags) &&
6522 !test_bit(In_sync, &rdev->flags) &&
6523 rdev->recovery_offset < mddev->curr_resync)
6524 rdev->recovery_offset = mddev->curr_resync;
6525 }
6526 }
6527 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6528
6529 skip:
6530 mddev->curr_resync = 0;
6531 mddev->curr_resync_completed = 0;
6532 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
6533
6534 mddev->resync_max = MaxSector;
6535 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
6536 wake_up(&resync_wait);
6537 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
6538 md_wakeup_thread(mddev->thread);
6539 return;
6540
6541 interrupted:
6542
6543
6544
6545 printk(KERN_INFO
6546 "md: md_do_sync() got signal ... exiting\n");
6547 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6548 goto out;
6549
6550}
6551EXPORT_SYMBOL_GPL(md_do_sync);
6552
6553
6554static int remove_and_add_spares(mddev_t *mddev)
6555{
6556 mdk_rdev_t *rdev;
6557 int spares = 0;
6558
6559 mddev->curr_resync_completed = 0;
6560
6561 list_for_each_entry(rdev, &mddev->disks, same_set)
6562 if (rdev->raid_disk >= 0 &&
6563 !test_bit(Blocked, &rdev->flags) &&
6564 (test_bit(Faulty, &rdev->flags) ||
6565 ! test_bit(In_sync, &rdev->flags)) &&
6566 atomic_read(&rdev->nr_pending)==0) {
6567 if (mddev->pers->hot_remove_disk(
6568 mddev, rdev->raid_disk)==0) {
6569 char nm[20];
6570 sprintf(nm,"rd%d", rdev->raid_disk);
6571 sysfs_remove_link(&mddev->kobj, nm);
6572 rdev->raid_disk = -1;
6573 }
6574 }
6575
6576 if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) {
6577 list_for_each_entry(rdev, &mddev->disks, same_set) {
6578 if (rdev->raid_disk >= 0 &&
6579 !test_bit(In_sync, &rdev->flags) &&
6580 !test_bit(Blocked, &rdev->flags))
6581 spares++;
6582 if (rdev->raid_disk < 0
6583 && !test_bit(Faulty, &rdev->flags)) {
6584 rdev->recovery_offset = 0;
6585 if (mddev->pers->
6586 hot_add_disk(mddev, rdev) == 0) {
6587 char nm[20];
6588 sprintf(nm, "rd%d", rdev->raid_disk);
6589 if (sysfs_create_link(&mddev->kobj,
6590 &rdev->kobj, nm))
6591 printk(KERN_WARNING
6592 "md: cannot register "
6593 "%s for %s\n",
6594 nm, mdname(mddev));
6595 spares++;
6596 md_new_event(mddev);
6597 } else
6598 break;
6599 }
6600 }
6601 }
6602 return spares;
6603}
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626void md_check_recovery(mddev_t *mddev)
6627{
6628 mdk_rdev_t *rdev;
6629
6630
6631 if (mddev->bitmap)
6632 bitmap_daemon_work(mddev->bitmap);
6633
6634 if (mddev->ro)
6635 return;
6636
6637 if (signal_pending(current)) {
6638 if (mddev->pers->sync_request && !mddev->external) {
6639 printk(KERN_INFO "md: %s in immediate safe mode\n",
6640 mdname(mddev));
6641 mddev->safemode = 2;
6642 }
6643 flush_signals(current);
6644 }
6645
6646 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
6647 return;
6648 if ( ! (
6649 (mddev->flags && !mddev->external) ||
6650 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
6651 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
6652 (mddev->external == 0 && mddev->safemode == 1) ||
6653 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
6654 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
6655 ))
6656 return;
6657
6658 if (mddev_trylock(mddev)) {
6659 int spares = 0;
6660
6661 if (mddev->ro) {
6662
6663
6664
6665 remove_and_add_spares(mddev);
6666 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6667 goto unlock;
6668 }
6669
6670 if (!mddev->external) {
6671 int did_change = 0;
6672 spin_lock_irq(&mddev->write_lock);
6673 if (mddev->safemode &&
6674 !atomic_read(&mddev->writes_pending) &&
6675 !mddev->in_sync &&
6676 mddev->recovery_cp == MaxSector) {
6677 mddev->in_sync = 1;
6678 did_change = 1;
6679 if (mddev->persistent)
6680 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6681 }
6682 if (mddev->safemode == 1)
6683 mddev->safemode = 0;
6684 spin_unlock_irq(&mddev->write_lock);
6685 if (did_change)
6686 sysfs_notify_dirent(mddev->sysfs_state);
6687 }
6688
6689 if (mddev->flags)
6690 md_update_sb(mddev, 0);
6691
6692 list_for_each_entry(rdev, &mddev->disks, same_set)
6693 if (test_and_clear_bit(StateChanged, &rdev->flags))
6694 sysfs_notify_dirent(rdev->sysfs_state);
6695
6696
6697 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
6698 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
6699
6700 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6701 goto unlock;
6702 }
6703 if (mddev->sync_thread) {
6704
6705 md_unregister_thread(mddev->sync_thread);
6706 mddev->sync_thread = NULL;
6707 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
6708 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
6709
6710
6711 if (mddev->pers->spare_active(mddev))
6712 sysfs_notify(&mddev->kobj, NULL,
6713 "degraded");
6714 }
6715 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
6716 mddev->pers->finish_reshape)
6717 mddev->pers->finish_reshape(mddev);
6718 md_update_sb(mddev, 1);
6719
6720
6721
6722
6723 if (!mddev->degraded)
6724 list_for_each_entry(rdev, &mddev->disks, same_set)
6725 rdev->saved_raid_disk = -1;
6726
6727 mddev->recovery = 0;
6728
6729 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6730 sysfs_notify_dirent(mddev->sysfs_action);
6731 md_new_event(mddev);
6732 goto unlock;
6733 }
6734
6735
6736
6737 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
6738 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6739
6740
6741
6742 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
6743 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
6744
6745 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
6746 goto unlock;
6747
6748
6749
6750
6751
6752
6753
6754 if (mddev->reshape_position != MaxSector) {
6755 if (mddev->pers->check_reshape == NULL ||
6756 mddev->pers->check_reshape(mddev) != 0)
6757
6758 goto unlock;
6759 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
6760 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6761 } else if ((spares = remove_and_add_spares(mddev))) {
6762 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
6763 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
6764 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
6765 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6766 } else if (mddev->recovery_cp < MaxSector) {
6767 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
6768 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6769 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
6770
6771 goto unlock;
6772
6773 if (mddev->pers->sync_request) {
6774 if (spares && mddev->bitmap && ! mddev->bitmap->file) {
6775
6776
6777
6778
6779 bitmap_write_all(mddev->bitmap);
6780 }
6781 mddev->sync_thread = md_register_thread(md_do_sync,
6782 mddev,
6783 "resync");
6784 if (!mddev->sync_thread) {
6785 printk(KERN_ERR "%s: could not start resync"
6786 " thread...\n",
6787 mdname(mddev));
6788
6789 mddev->recovery = 0;
6790 } else
6791 md_wakeup_thread(mddev->sync_thread);
6792 sysfs_notify_dirent(mddev->sysfs_action);
6793 md_new_event(mddev);
6794 }
6795 unlock:
6796 if (!mddev->sync_thread) {
6797 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
6798 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
6799 &mddev->recovery))
6800 if (mddev->sysfs_action)
6801 sysfs_notify_dirent(mddev->sysfs_action);
6802 }
6803 mddev_unlock(mddev);
6804 }
6805}
6806
6807void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
6808{
6809 sysfs_notify_dirent(rdev->sysfs_state);
6810 wait_event_timeout(rdev->blocked_wait,
6811 !test_bit(Blocked, &rdev->flags),
6812 msecs_to_jiffies(5000));
6813 rdev_dec_pending(rdev, mddev);
6814}
6815EXPORT_SYMBOL(md_wait_for_blocked_rdev);
6816
6817static int md_notify_reboot(struct notifier_block *this,
6818 unsigned long code, void *x)
6819{
6820 struct list_head *tmp;
6821 mddev_t *mddev;
6822
6823 if ((code == SYS_DOWN) || (code == SYS_HALT) || (code == SYS_POWER_OFF)) {
6824
6825 printk(KERN_INFO "md: stopping all md devices.\n");
6826
6827 for_each_mddev(mddev, tmp)
6828 if (mddev_trylock(mddev)) {
6829
6830
6831
6832
6833 do_md_stop(mddev, 1, 100);
6834 mddev_unlock(mddev);
6835 }
6836
6837
6838
6839
6840
6841
6842 mdelay(1000*1);
6843 }
6844 return NOTIFY_DONE;
6845}
6846
6847static struct notifier_block md_notifier = {
6848 .notifier_call = md_notify_reboot,
6849 .next = NULL,
6850 .priority = INT_MAX,
6851};
6852
6853static void md_geninit(void)
6854{
6855 dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
6856
6857 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
6858}
6859
6860static int __init md_init(void)
6861{
6862 if (register_blkdev(MD_MAJOR, "md"))
6863 return -1;
6864 if ((mdp_major=register_blkdev(0, "mdp"))<=0) {
6865 unregister_blkdev(MD_MAJOR, "md");
6866 return -1;
6867 }
6868 blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
6869 md_probe, NULL, NULL);
6870 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
6871 md_probe, NULL, NULL);
6872
6873 register_reboot_notifier(&md_notifier);
6874 raid_table_header = register_sysctl_table(raid_root_table);
6875
6876 md_geninit();
6877 return 0;
6878}
6879
6880
6881#ifndef MODULE
6882
6883
6884
6885
6886
6887
6888static LIST_HEAD(all_detected_devices);
6889struct detected_devices_node {
6890 struct list_head list;
6891 dev_t dev;
6892};
6893
6894void md_autodetect_dev(dev_t dev)
6895{
6896 struct detected_devices_node *node_detected_dev;
6897
6898 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
6899 if (node_detected_dev) {
6900 node_detected_dev->dev = dev;
6901 list_add_tail(&node_detected_dev->list, &all_detected_devices);
6902 } else {
6903 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
6904 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
6905 }
6906}
6907
6908
6909static void autostart_arrays(int part)
6910{
6911 mdk_rdev_t *rdev;
6912 struct detected_devices_node *node_detected_dev;
6913 dev_t dev;
6914 int i_scanned, i_passed;
6915
6916 i_scanned = 0;
6917 i_passed = 0;
6918
6919 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
6920
6921 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
6922 i_scanned++;
6923 node_detected_dev = list_entry(all_detected_devices.next,
6924 struct detected_devices_node, list);
6925 list_del(&node_detected_dev->list);
6926 dev = node_detected_dev->dev;
6927 kfree(node_detected_dev);
6928 rdev = md_import_device(dev,0, 90);
6929 if (IS_ERR(rdev))
6930 continue;
6931
6932 if (test_bit(Faulty, &rdev->flags)) {
6933 MD_BUG();
6934 continue;
6935 }
6936 set_bit(AutoDetected, &rdev->flags);
6937 list_add(&rdev->same_set, &pending_raid_disks);
6938 i_passed++;
6939 }
6940
6941 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
6942 i_scanned, i_passed);
6943
6944 autorun_devices(part);
6945}
6946
6947#endif
6948
6949static __exit void md_exit(void)
6950{
6951 mddev_t *mddev;
6952 struct list_head *tmp;
6953
6954 blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
6955 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
6956
6957 unregister_blkdev(MD_MAJOR,"md");
6958 unregister_blkdev(mdp_major, "mdp");
6959 unregister_reboot_notifier(&md_notifier);
6960 unregister_sysctl_table(raid_table_header);
6961 remove_proc_entry("mdstat", NULL);
6962 for_each_mddev(mddev, tmp) {
6963 export_array(mddev);
6964 mddev->hold_active = 0;
6965 }
6966}
6967
6968subsys_initcall(md_init);
6969module_exit(md_exit)
6970
6971static int get_ro(char *buffer, struct kernel_param *kp)
6972{
6973 return sprintf(buffer, "%d", start_readonly);
6974}
6975static int set_ro(const char *val, struct kernel_param *kp)
6976{
6977 char *e;
6978 int num = simple_strtoul(val, &e, 10);
6979 if (*val && (*e == '\0' || *e == '\n')) {
6980 start_readonly = num;
6981 return 0;
6982 }
6983 return -EINVAL;
6984}
6985
6986module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
6987module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
6988
6989module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
6990
6991EXPORT_SYMBOL(register_md_personality);
6992EXPORT_SYMBOL(unregister_md_personality);
6993EXPORT_SYMBOL(md_error);
6994EXPORT_SYMBOL(md_done_sync);
6995EXPORT_SYMBOL(md_write_start);
6996EXPORT_SYMBOL(md_write_end);
6997EXPORT_SYMBOL(md_register_thread);
6998EXPORT_SYMBOL(md_unregister_thread);
6999EXPORT_SYMBOL(md_wakeup_thread);
7000EXPORT_SYMBOL(md_check_recovery);
7001MODULE_LICENSE("GPL");
7002MODULE_ALIAS("md");
7003MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
7004