1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/blkdev.h>
37#include <linux/sysctl.h>
38#include <linux/seq_file.h>
39#include <linux/fs.h>
40#include <linux/poll.h>
41#include <linux/ctype.h>
42#include <linux/string.h>
43#include <linux/hdreg.h>
44#include <linux/proc_fs.h>
45#include <linux/random.h>
46#include <linux/module.h>
47#include <linux/reboot.h>
48#include <linux/file.h>
49#include <linux/compat.h>
50#include <linux/delay.h>
51#include <linux/raid/md_p.h>
52#include <linux/raid/md_u.h>
53#include <linux/slab.h>
54#include "md.h"
55#include "bitmap.h"
56
57#ifndef MODULE
58static void autostart_arrays(int part);
59#endif
60
61
62
63
64
65
66static LIST_HEAD(pers_list);
67static DEFINE_SPINLOCK(pers_lock);
68
69static void md_print_devices(void);
70
71static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
72static struct workqueue_struct *md_wq;
73static struct workqueue_struct *md_misc_wq;
74
75static int remove_and_add_spares(struct mddev *mddev,
76 struct md_rdev *this);
77
78#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
79
80
81
82
83
84
85#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
86
87
88
89
90
91
92
93
94
95
96
97
98
99static int sysctl_speed_limit_min = 1000;
100static int sysctl_speed_limit_max = 200000;
101static inline int speed_min(struct mddev *mddev)
102{
103 return mddev->sync_speed_min ?
104 mddev->sync_speed_min : sysctl_speed_limit_min;
105}
106
107static inline int speed_max(struct mddev *mddev)
108{
109 return mddev->sync_speed_max ?
110 mddev->sync_speed_max : sysctl_speed_limit_max;
111}
112
113static struct ctl_table_header *raid_table_header;
114
115static struct ctl_table raid_table[] = {
116 {
117 .procname = "speed_limit_min",
118 .data = &sysctl_speed_limit_min,
119 .maxlen = sizeof(int),
120 .mode = S_IRUGO|S_IWUSR,
121 .proc_handler = proc_dointvec,
122 },
123 {
124 .procname = "speed_limit_max",
125 .data = &sysctl_speed_limit_max,
126 .maxlen = sizeof(int),
127 .mode = S_IRUGO|S_IWUSR,
128 .proc_handler = proc_dointvec,
129 },
130 { }
131};
132
133static struct ctl_table raid_dir_table[] = {
134 {
135 .procname = "raid",
136 .maxlen = 0,
137 .mode = S_IRUGO|S_IXUGO,
138 .child = raid_table,
139 },
140 { }
141};
142
143static struct ctl_table raid_root_table[] = {
144 {
145 .procname = "dev",
146 .maxlen = 0,
147 .mode = 0555,
148 .child = raid_dir_table,
149 },
150 { }
151};
152
153static const struct block_device_operations md_fops;
154
155static int start_readonly;
156
157
158
159
160
161struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
162 struct mddev *mddev)
163{
164 struct bio *b;
165
166 if (!mddev || !mddev->bio_set)
167 return bio_alloc(gfp_mask, nr_iovecs);
168
169 b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
170 if (!b)
171 return NULL;
172 return b;
173}
174EXPORT_SYMBOL_GPL(bio_alloc_mddev);
175
176struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
177 struct mddev *mddev)
178{
179 if (!mddev || !mddev->bio_set)
180 return bio_clone(bio, gfp_mask);
181
182 return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
183}
184EXPORT_SYMBOL_GPL(bio_clone_mddev);
185
186
187
188
189
190
191
192
193
194
195
196static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
197static atomic_t md_event_count;
198void md_new_event(struct mddev *mddev)
199{
200 atomic_inc(&md_event_count);
201 wake_up(&md_event_waiters);
202}
203EXPORT_SYMBOL_GPL(md_new_event);
204
205
206
207
208static void md_new_event_inintr(struct mddev *mddev)
209{
210 atomic_inc(&md_event_count);
211 wake_up(&md_event_waiters);
212}
213
214
215
216
217
218static LIST_HEAD(all_mddevs);
219static DEFINE_SPINLOCK(all_mddevs_lock);
220
221
222
223
224
225
226
227
228
229#define for_each_mddev(_mddev,_tmp) \
230 \
231 for (({ spin_lock(&all_mddevs_lock); \
232 _tmp = all_mddevs.next; \
233 _mddev = NULL;}); \
234 ({ if (_tmp != &all_mddevs) \
235 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
236 spin_unlock(&all_mddevs_lock); \
237 if (_mddev) mddev_put(_mddev); \
238 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
239 _tmp != &all_mddevs;}); \
240 ({ spin_lock(&all_mddevs_lock); \
241 _tmp = _tmp->next;}) \
242 )
243
244
245
246
247
248
249
250
251
252static void md_make_request(struct request_queue *q, struct bio *bio)
253{
254 const int rw = bio_data_dir(bio);
255 struct mddev *mddev = q->queuedata;
256 int cpu;
257 unsigned int sectors;
258
259 if (mddev == NULL || mddev->pers == NULL
260 || !mddev->ready) {
261 bio_io_error(bio);
262 return;
263 }
264 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
265 bio_endio(bio, bio_sectors(bio) == 0 ? 0 : -EROFS);
266 return;
267 }
268 smp_rmb();
269 rcu_read_lock();
270 if (mddev->suspended) {
271 DEFINE_WAIT(__wait);
272 for (;;) {
273 prepare_to_wait(&mddev->sb_wait, &__wait,
274 TASK_UNINTERRUPTIBLE);
275 if (!mddev->suspended)
276 break;
277 rcu_read_unlock();
278 schedule();
279 rcu_read_lock();
280 }
281 finish_wait(&mddev->sb_wait, &__wait);
282 }
283 atomic_inc(&mddev->active_io);
284 rcu_read_unlock();
285
286
287
288
289
290 sectors = bio_sectors(bio);
291 mddev->pers->make_request(mddev, bio);
292
293 cpu = part_stat_lock();
294 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
295 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
296 part_stat_unlock();
297
298 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
299 wake_up(&mddev->sb_wait);
300}
301
302
303
304
305
306
307
308void mddev_suspend(struct mddev *mddev)
309{
310 BUG_ON(mddev->suspended);
311 mddev->suspended = 1;
312 synchronize_rcu();
313 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
314 mddev->pers->quiesce(mddev, 1);
315
316 del_timer_sync(&mddev->safemode_timer);
317}
318EXPORT_SYMBOL_GPL(mddev_suspend);
319
320void mddev_resume(struct mddev *mddev)
321{
322 mddev->suspended = 0;
323 wake_up(&mddev->sb_wait);
324 mddev->pers->quiesce(mddev, 0);
325
326 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
327 md_wakeup_thread(mddev->thread);
328 md_wakeup_thread(mddev->sync_thread);
329}
330EXPORT_SYMBOL_GPL(mddev_resume);
331
332int mddev_congested(struct mddev *mddev, int bits)
333{
334 return mddev->suspended;
335}
336EXPORT_SYMBOL(mddev_congested);
337
338
339
340
341
342static void md_end_flush(struct bio *bio, int err)
343{
344 struct md_rdev *rdev = bio->bi_private;
345 struct mddev *mddev = rdev->mddev;
346
347 rdev_dec_pending(rdev, mddev);
348
349 if (atomic_dec_and_test(&mddev->flush_pending)) {
350
351 queue_work(md_wq, &mddev->flush_work);
352 }
353 bio_put(bio);
354}
355
356static void md_submit_flush_data(struct work_struct *ws);
357
358static void submit_flushes(struct work_struct *ws)
359{
360 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
361 struct md_rdev *rdev;
362
363 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
364 atomic_set(&mddev->flush_pending, 1);
365 rcu_read_lock();
366 rdev_for_each_rcu(rdev, mddev)
367 if (rdev->raid_disk >= 0 &&
368 !test_bit(Faulty, &rdev->flags)) {
369
370
371
372
373 struct bio *bi;
374 atomic_inc(&rdev->nr_pending);
375 atomic_inc(&rdev->nr_pending);
376 rcu_read_unlock();
377 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
378 bi->bi_end_io = md_end_flush;
379 bi->bi_private = rdev;
380 bi->bi_bdev = rdev->bdev;
381 atomic_inc(&mddev->flush_pending);
382 submit_bio(WRITE_FLUSH, bi);
383 rcu_read_lock();
384 rdev_dec_pending(rdev, mddev);
385 }
386 rcu_read_unlock();
387 if (atomic_dec_and_test(&mddev->flush_pending))
388 queue_work(md_wq, &mddev->flush_work);
389}
390
391static void md_submit_flush_data(struct work_struct *ws)
392{
393 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
394 struct bio *bio = mddev->flush_bio;
395
396 if (bio->bi_iter.bi_size == 0)
397
398 bio_endio(bio, 0);
399 else {
400 bio->bi_rw &= ~REQ_FLUSH;
401 mddev->pers->make_request(mddev, bio);
402 }
403
404 mddev->flush_bio = NULL;
405 wake_up(&mddev->sb_wait);
406}
407
408void md_flush_request(struct mddev *mddev, struct bio *bio)
409{
410 spin_lock_irq(&mddev->write_lock);
411 wait_event_lock_irq(mddev->sb_wait,
412 !mddev->flush_bio,
413 mddev->write_lock);
414 mddev->flush_bio = bio;
415 spin_unlock_irq(&mddev->write_lock);
416
417 INIT_WORK(&mddev->flush_work, submit_flushes);
418 queue_work(md_wq, &mddev->flush_work);
419}
420EXPORT_SYMBOL(md_flush_request);
421
422void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
423{
424 struct mddev *mddev = cb->data;
425 md_wakeup_thread(mddev->thread);
426 kfree(cb);
427}
428EXPORT_SYMBOL(md_unplug);
429
430static inline struct mddev *mddev_get(struct mddev *mddev)
431{
432 atomic_inc(&mddev->active);
433 return mddev;
434}
435
436static void mddev_delayed_delete(struct work_struct *ws);
437
438static void mddev_put(struct mddev *mddev)
439{
440 struct bio_set *bs = NULL;
441
442 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
443 return;
444 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
445 mddev->ctime == 0 && !mddev->hold_active) {
446
447
448 list_del_init(&mddev->all_mddevs);
449 bs = mddev->bio_set;
450 mddev->bio_set = NULL;
451 if (mddev->gendisk) {
452
453
454
455
456
457 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
458 queue_work(md_misc_wq, &mddev->del_work);
459 } else
460 kfree(mddev);
461 }
462 spin_unlock(&all_mddevs_lock);
463 if (bs)
464 bioset_free(bs);
465}
466
467void mddev_init(struct mddev *mddev)
468{
469 mutex_init(&mddev->open_mutex);
470 mutex_init(&mddev->reconfig_mutex);
471 mutex_init(&mddev->bitmap_info.mutex);
472 INIT_LIST_HEAD(&mddev->disks);
473 INIT_LIST_HEAD(&mddev->all_mddevs);
474 init_timer(&mddev->safemode_timer);
475 atomic_set(&mddev->active, 1);
476 atomic_set(&mddev->openers, 0);
477 atomic_set(&mddev->active_io, 0);
478 spin_lock_init(&mddev->write_lock);
479 atomic_set(&mddev->flush_pending, 0);
480 init_waitqueue_head(&mddev->sb_wait);
481 init_waitqueue_head(&mddev->recovery_wait);
482 mddev->reshape_position = MaxSector;
483 mddev->reshape_backwards = 0;
484 mddev->last_sync_action = "none";
485 mddev->resync_min = 0;
486 mddev->resync_max = MaxSector;
487 mddev->level = LEVEL_NONE;
488}
489EXPORT_SYMBOL_GPL(mddev_init);
490
491static struct mddev * mddev_find(dev_t unit)
492{
493 struct mddev *mddev, *new = NULL;
494
495 if (unit && MAJOR(unit) != MD_MAJOR)
496 unit &= ~((1<<MdpMinorShift)-1);
497
498 retry:
499 spin_lock(&all_mddevs_lock);
500
501 if (unit) {
502 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
503 if (mddev->unit == unit) {
504 mddev_get(mddev);
505 spin_unlock(&all_mddevs_lock);
506 kfree(new);
507 return mddev;
508 }
509
510 if (new) {
511 list_add(&new->all_mddevs, &all_mddevs);
512 spin_unlock(&all_mddevs_lock);
513 new->hold_active = UNTIL_IOCTL;
514 return new;
515 }
516 } else if (new) {
517
518 static int next_minor = 512;
519 int start = next_minor;
520 int is_free = 0;
521 int dev = 0;
522 while (!is_free) {
523 dev = MKDEV(MD_MAJOR, next_minor);
524 next_minor++;
525 if (next_minor > MINORMASK)
526 next_minor = 0;
527 if (next_minor == start) {
528
529 spin_unlock(&all_mddevs_lock);
530 kfree(new);
531 return NULL;
532 }
533
534 is_free = 1;
535 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
536 if (mddev->unit == dev) {
537 is_free = 0;
538 break;
539 }
540 }
541 new->unit = dev;
542 new->md_minor = MINOR(dev);
543 new->hold_active = UNTIL_STOP;
544 list_add(&new->all_mddevs, &all_mddevs);
545 spin_unlock(&all_mddevs_lock);
546 return new;
547 }
548 spin_unlock(&all_mddevs_lock);
549
550 new = kzalloc(sizeof(*new), GFP_KERNEL);
551 if (!new)
552 return NULL;
553
554 new->unit = unit;
555 if (MAJOR(unit) == MD_MAJOR)
556 new->md_minor = MINOR(unit);
557 else
558 new->md_minor = MINOR(unit) >> MdpMinorShift;
559
560 mddev_init(new);
561
562 goto retry;
563}
564
565static inline int __must_check mddev_lock(struct mddev * mddev)
566{
567 return mutex_lock_interruptible(&mddev->reconfig_mutex);
568}
569
570
571
572
573static inline void mddev_lock_nointr(struct mddev * mddev)
574{
575 mutex_lock(&mddev->reconfig_mutex);
576}
577
578static inline int mddev_is_locked(struct mddev *mddev)
579{
580 return mutex_is_locked(&mddev->reconfig_mutex);
581}
582
583static inline int mddev_trylock(struct mddev * mddev)
584{
585 return mutex_trylock(&mddev->reconfig_mutex);
586}
587
588static struct attribute_group md_redundancy_group;
589
590static void mddev_unlock(struct mddev * mddev)
591{
592 if (mddev->to_remove) {
593
594
595
596
597
598
599
600
601
602
603
604
605 struct attribute_group *to_remove = mddev->to_remove;
606 mddev->to_remove = NULL;
607 mddev->sysfs_active = 1;
608 mutex_unlock(&mddev->reconfig_mutex);
609
610 if (mddev->kobj.sd) {
611 if (to_remove != &md_redundancy_group)
612 sysfs_remove_group(&mddev->kobj, to_remove);
613 if (mddev->pers == NULL ||
614 mddev->pers->sync_request == NULL) {
615 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
616 if (mddev->sysfs_action)
617 sysfs_put(mddev->sysfs_action);
618 mddev->sysfs_action = NULL;
619 }
620 }
621 mddev->sysfs_active = 0;
622 } else
623 mutex_unlock(&mddev->reconfig_mutex);
624
625
626
627
628 spin_lock(&pers_lock);
629 md_wakeup_thread(mddev->thread);
630 spin_unlock(&pers_lock);
631}
632
633static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr)
634{
635 struct md_rdev *rdev;
636
637 rdev_for_each(rdev, mddev)
638 if (rdev->desc_nr == nr)
639 return rdev;
640
641 return NULL;
642}
643
644static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr)
645{
646 struct md_rdev *rdev;
647
648 rdev_for_each_rcu(rdev, mddev)
649 if (rdev->desc_nr == nr)
650 return rdev;
651
652 return NULL;
653}
654
655static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
656{
657 struct md_rdev *rdev;
658
659 rdev_for_each(rdev, mddev)
660 if (rdev->bdev->bd_dev == dev)
661 return rdev;
662
663 return NULL;
664}
665
666static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
667{
668 struct md_rdev *rdev;
669
670 rdev_for_each_rcu(rdev, mddev)
671 if (rdev->bdev->bd_dev == dev)
672 return rdev;
673
674 return NULL;
675}
676
677static struct md_personality *find_pers(int level, char *clevel)
678{
679 struct md_personality *pers;
680 list_for_each_entry(pers, &pers_list, list) {
681 if (level != LEVEL_NONE && pers->level == level)
682 return pers;
683 if (strcmp(pers->name, clevel)==0)
684 return pers;
685 }
686 return NULL;
687}
688
689
690static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
691{
692 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
693 return MD_NEW_SIZE_SECTORS(num_sectors);
694}
695
696static int alloc_disk_sb(struct md_rdev * rdev)
697{
698 if (rdev->sb_page)
699 MD_BUG();
700
701 rdev->sb_page = alloc_page(GFP_KERNEL);
702 if (!rdev->sb_page) {
703 printk(KERN_ALERT "md: out of memory.\n");
704 return -ENOMEM;
705 }
706
707 return 0;
708}
709
710void md_rdev_clear(struct md_rdev *rdev)
711{
712 if (rdev->sb_page) {
713 put_page(rdev->sb_page);
714 rdev->sb_loaded = 0;
715 rdev->sb_page = NULL;
716 rdev->sb_start = 0;
717 rdev->sectors = 0;
718 }
719 if (rdev->bb_page) {
720 put_page(rdev->bb_page);
721 rdev->bb_page = NULL;
722 }
723 kfree(rdev->badblocks.page);
724 rdev->badblocks.page = NULL;
725}
726EXPORT_SYMBOL_GPL(md_rdev_clear);
727
728static void super_written(struct bio *bio, int error)
729{
730 struct md_rdev *rdev = bio->bi_private;
731 struct mddev *mddev = rdev->mddev;
732
733 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
734 printk("md: super_written gets error=%d, uptodate=%d\n",
735 error, test_bit(BIO_UPTODATE, &bio->bi_flags));
736 WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
737 md_error(mddev, rdev);
738 }
739
740 if (atomic_dec_and_test(&mddev->pending_writes))
741 wake_up(&mddev->sb_wait);
742 bio_put(bio);
743}
744
745void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
746 sector_t sector, int size, struct page *page)
747{
748
749
750
751
752
753
754 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
755
756 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
757 bio->bi_iter.bi_sector = sector;
758 bio_add_page(bio, page, size, 0);
759 bio->bi_private = rdev;
760 bio->bi_end_io = super_written;
761
762 atomic_inc(&mddev->pending_writes);
763 submit_bio(WRITE_FLUSH_FUA, bio);
764}
765
766void md_super_wait(struct mddev *mddev)
767{
768
769 DEFINE_WAIT(wq);
770 for(;;) {
771 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
772 if (atomic_read(&mddev->pending_writes)==0)
773 break;
774 schedule();
775 }
776 finish_wait(&mddev->sb_wait, &wq);
777}
778
779int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
780 struct page *page, int rw, bool metadata_op)
781{
782 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
783 int ret;
784
785 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
786 rdev->meta_bdev : rdev->bdev;
787 if (metadata_op)
788 bio->bi_iter.bi_sector = sector + rdev->sb_start;
789 else if (rdev->mddev->reshape_position != MaxSector &&
790 (rdev->mddev->reshape_backwards ==
791 (sector >= rdev->mddev->reshape_position)))
792 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
793 else
794 bio->bi_iter.bi_sector = sector + rdev->data_offset;
795 bio_add_page(bio, page, size, 0);
796 submit_bio_wait(rw, bio);
797
798 ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
799 bio_put(bio);
800 return ret;
801}
802EXPORT_SYMBOL_GPL(sync_page_io);
803
804static int read_disk_sb(struct md_rdev * rdev, int size)
805{
806 char b[BDEVNAME_SIZE];
807 if (!rdev->sb_page) {
808 MD_BUG();
809 return -EINVAL;
810 }
811 if (rdev->sb_loaded)
812 return 0;
813
814
815 if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
816 goto fail;
817 rdev->sb_loaded = 1;
818 return 0;
819
820fail:
821 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
822 bdevname(rdev->bdev,b));
823 return -EINVAL;
824}
825
826static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
827{
828 return sb1->set_uuid0 == sb2->set_uuid0 &&
829 sb1->set_uuid1 == sb2->set_uuid1 &&
830 sb1->set_uuid2 == sb2->set_uuid2 &&
831 sb1->set_uuid3 == sb2->set_uuid3;
832}
833
834static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
835{
836 int ret;
837 mdp_super_t *tmp1, *tmp2;
838
839 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
840 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
841
842 if (!tmp1 || !tmp2) {
843 ret = 0;
844 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
845 goto abort;
846 }
847
848 *tmp1 = *sb1;
849 *tmp2 = *sb2;
850
851
852
853
854 tmp1->nr_disks = 0;
855 tmp2->nr_disks = 0;
856
857 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
858abort:
859 kfree(tmp1);
860 kfree(tmp2);
861 return ret;
862}
863
864
865static u32 md_csum_fold(u32 csum)
866{
867 csum = (csum & 0xffff) + (csum >> 16);
868 return (csum & 0xffff) + (csum >> 16);
869}
870
871static unsigned int calc_sb_csum(mdp_super_t * sb)
872{
873 u64 newcsum = 0;
874 u32 *sb32 = (u32*)sb;
875 int i;
876 unsigned int disk_csum, csum;
877
878 disk_csum = sb->sb_csum;
879 sb->sb_csum = 0;
880
881 for (i = 0; i < MD_SB_BYTES/4 ; i++)
882 newcsum += sb32[i];
883 csum = (newcsum & 0xffffffff) + (newcsum>>32);
884
885
886#ifdef CONFIG_ALPHA
887
888
889
890
891
892
893
894
895 sb->sb_csum = md_csum_fold(disk_csum);
896#else
897 sb->sb_csum = disk_csum;
898#endif
899 return csum;
900}
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933struct super_type {
934 char *name;
935 struct module *owner;
936 int (*load_super)(struct md_rdev *rdev,
937 struct md_rdev *refdev,
938 int minor_version);
939 int (*validate_super)(struct mddev *mddev,
940 struct md_rdev *rdev);
941 void (*sync_super)(struct mddev *mddev,
942 struct md_rdev *rdev);
943 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
944 sector_t num_sectors);
945 int (*allow_new_offset)(struct md_rdev *rdev,
946 unsigned long long new_offset);
947};
948
949
950
951
952
953
954
955
956
957int md_check_no_bitmap(struct mddev *mddev)
958{
959 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
960 return 0;
961 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
962 mdname(mddev), mddev->pers->name);
963 return 1;
964}
965EXPORT_SYMBOL(md_check_no_bitmap);
966
967
968
969
970static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
971{
972 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
973 mdp_super_t *sb;
974 int ret;
975
976
977
978
979
980
981
982 rdev->sb_start = calc_dev_sboffset(rdev);
983
984 ret = read_disk_sb(rdev, MD_SB_BYTES);
985 if (ret) return ret;
986
987 ret = -EINVAL;
988
989 bdevname(rdev->bdev, b);
990 sb = page_address(rdev->sb_page);
991
992 if (sb->md_magic != MD_SB_MAGIC) {
993 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
994 b);
995 goto abort;
996 }
997
998 if (sb->major_version != 0 ||
999 sb->minor_version < 90 ||
1000 sb->minor_version > 91) {
1001 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
1002 sb->major_version, sb->minor_version,
1003 b);
1004 goto abort;
1005 }
1006
1007 if (sb->raid_disks <= 0)
1008 goto abort;
1009
1010 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1011 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
1012 b);
1013 goto abort;
1014 }
1015
1016 rdev->preferred_minor = sb->md_minor;
1017 rdev->data_offset = 0;
1018 rdev->new_data_offset = 0;
1019 rdev->sb_size = MD_SB_BYTES;
1020 rdev->badblocks.shift = -1;
1021
1022 if (sb->level == LEVEL_MULTIPATH)
1023 rdev->desc_nr = -1;
1024 else
1025 rdev->desc_nr = sb->this_disk.number;
1026
1027 if (!refdev) {
1028 ret = 1;
1029 } else {
1030 __u64 ev1, ev2;
1031 mdp_super_t *refsb = page_address(refdev->sb_page);
1032 if (!uuid_equal(refsb, sb)) {
1033 printk(KERN_WARNING "md: %s has different UUID to %s\n",
1034 b, bdevname(refdev->bdev,b2));
1035 goto abort;
1036 }
1037 if (!sb_equal(refsb, sb)) {
1038 printk(KERN_WARNING "md: %s has same UUID"
1039 " but different superblock to %s\n",
1040 b, bdevname(refdev->bdev, b2));
1041 goto abort;
1042 }
1043 ev1 = md_event(sb);
1044 ev2 = md_event(refsb);
1045 if (ev1 > ev2)
1046 ret = 1;
1047 else
1048 ret = 0;
1049 }
1050 rdev->sectors = rdev->sb_start;
1051
1052
1053
1054
1055 if (rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1056 rdev->sectors = (2ULL << 32) - 2;
1057
1058 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1059
1060 ret = -EINVAL;
1061
1062 abort:
1063 return ret;
1064}
1065
1066
1067
1068
1069static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1070{
1071 mdp_disk_t *desc;
1072 mdp_super_t *sb = page_address(rdev->sb_page);
1073 __u64 ev1 = md_event(sb);
1074
1075 rdev->raid_disk = -1;
1076 clear_bit(Faulty, &rdev->flags);
1077 clear_bit(In_sync, &rdev->flags);
1078 clear_bit(Bitmap_sync, &rdev->flags);
1079 clear_bit(WriteMostly, &rdev->flags);
1080
1081 if (mddev->raid_disks == 0) {
1082 mddev->major_version = 0;
1083 mddev->minor_version = sb->minor_version;
1084 mddev->patch_version = sb->patch_version;
1085 mddev->external = 0;
1086 mddev->chunk_sectors = sb->chunk_size >> 9;
1087 mddev->ctime = sb->ctime;
1088 mddev->utime = sb->utime;
1089 mddev->level = sb->level;
1090 mddev->clevel[0] = 0;
1091 mddev->layout = sb->layout;
1092 mddev->raid_disks = sb->raid_disks;
1093 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1094 mddev->events = ev1;
1095 mddev->bitmap_info.offset = 0;
1096 mddev->bitmap_info.space = 0;
1097
1098 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1099 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1100 mddev->reshape_backwards = 0;
1101
1102 if (mddev->minor_version >= 91) {
1103 mddev->reshape_position = sb->reshape_position;
1104 mddev->delta_disks = sb->delta_disks;
1105 mddev->new_level = sb->new_level;
1106 mddev->new_layout = sb->new_layout;
1107 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1108 if (mddev->delta_disks < 0)
1109 mddev->reshape_backwards = 1;
1110 } else {
1111 mddev->reshape_position = MaxSector;
1112 mddev->delta_disks = 0;
1113 mddev->new_level = mddev->level;
1114 mddev->new_layout = mddev->layout;
1115 mddev->new_chunk_sectors = mddev->chunk_sectors;
1116 }
1117
1118 if (sb->state & (1<<MD_SB_CLEAN))
1119 mddev->recovery_cp = MaxSector;
1120 else {
1121 if (sb->events_hi == sb->cp_events_hi &&
1122 sb->events_lo == sb->cp_events_lo) {
1123 mddev->recovery_cp = sb->recovery_cp;
1124 } else
1125 mddev->recovery_cp = 0;
1126 }
1127
1128 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1129 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1130 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1131 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1132
1133 mddev->max_disks = MD_SB_DISKS;
1134
1135 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1136 mddev->bitmap_info.file == NULL) {
1137 mddev->bitmap_info.offset =
1138 mddev->bitmap_info.default_offset;
1139 mddev->bitmap_info.space =
1140 mddev->bitmap_info.default_space;
1141 }
1142
1143 } else if (mddev->pers == NULL) {
1144
1145
1146 ++ev1;
1147 if (sb->disks[rdev->desc_nr].state & (
1148 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1149 if (ev1 < mddev->events)
1150 return -EINVAL;
1151 } else if (mddev->bitmap) {
1152
1153
1154
1155 if (ev1 < mddev->bitmap->events_cleared)
1156 return 0;
1157 if (ev1 < mddev->events)
1158 set_bit(Bitmap_sync, &rdev->flags);
1159 } else {
1160 if (ev1 < mddev->events)
1161
1162 return 0;
1163 }
1164
1165 if (mddev->level != LEVEL_MULTIPATH) {
1166 desc = sb->disks + rdev->desc_nr;
1167
1168 if (desc->state & (1<<MD_DISK_FAULTY))
1169 set_bit(Faulty, &rdev->flags);
1170 else if (desc->state & (1<<MD_DISK_SYNC)
1171) {
1172 set_bit(In_sync, &rdev->flags);
1173 rdev->raid_disk = desc->raid_disk;
1174 rdev->saved_raid_disk = desc->raid_disk;
1175 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1176
1177
1178
1179 if (mddev->minor_version >= 91) {
1180 rdev->recovery_offset = 0;
1181 rdev->raid_disk = desc->raid_disk;
1182 }
1183 }
1184 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1185 set_bit(WriteMostly, &rdev->flags);
1186 } else
1187 set_bit(In_sync, &rdev->flags);
1188 return 0;
1189}
1190
1191
1192
1193
1194static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1195{
1196 mdp_super_t *sb;
1197 struct md_rdev *rdev2;
1198 int next_spare = mddev->raid_disks;
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211 int i;
1212 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1213
1214 rdev->sb_size = MD_SB_BYTES;
1215
1216 sb = page_address(rdev->sb_page);
1217
1218 memset(sb, 0, sizeof(*sb));
1219
1220 sb->md_magic = MD_SB_MAGIC;
1221 sb->major_version = mddev->major_version;
1222 sb->patch_version = mddev->patch_version;
1223 sb->gvalid_words = 0;
1224 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1225 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1226 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1227 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1228
1229 sb->ctime = mddev->ctime;
1230 sb->level = mddev->level;
1231 sb->size = mddev->dev_sectors / 2;
1232 sb->raid_disks = mddev->raid_disks;
1233 sb->md_minor = mddev->md_minor;
1234 sb->not_persistent = 0;
1235 sb->utime = mddev->utime;
1236 sb->state = 0;
1237 sb->events_hi = (mddev->events>>32);
1238 sb->events_lo = (u32)mddev->events;
1239
1240 if (mddev->reshape_position == MaxSector)
1241 sb->minor_version = 90;
1242 else {
1243 sb->minor_version = 91;
1244 sb->reshape_position = mddev->reshape_position;
1245 sb->new_level = mddev->new_level;
1246 sb->delta_disks = mddev->delta_disks;
1247 sb->new_layout = mddev->new_layout;
1248 sb->new_chunk = mddev->new_chunk_sectors << 9;
1249 }
1250 mddev->minor_version = sb->minor_version;
1251 if (mddev->in_sync)
1252 {
1253 sb->recovery_cp = mddev->recovery_cp;
1254 sb->cp_events_hi = (mddev->events>>32);
1255 sb->cp_events_lo = (u32)mddev->events;
1256 if (mddev->recovery_cp == MaxSector)
1257 sb->state = (1<< MD_SB_CLEAN);
1258 } else
1259 sb->recovery_cp = 0;
1260
1261 sb->layout = mddev->layout;
1262 sb->chunk_size = mddev->chunk_sectors << 9;
1263
1264 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1265 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1266
1267 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1268 rdev_for_each(rdev2, mddev) {
1269 mdp_disk_t *d;
1270 int desc_nr;
1271 int is_active = test_bit(In_sync, &rdev2->flags);
1272
1273 if (rdev2->raid_disk >= 0 &&
1274 sb->minor_version >= 91)
1275
1276
1277
1278
1279 is_active = 1;
1280 if (rdev2->raid_disk < 0 ||
1281 test_bit(Faulty, &rdev2->flags))
1282 is_active = 0;
1283 if (is_active)
1284 desc_nr = rdev2->raid_disk;
1285 else
1286 desc_nr = next_spare++;
1287 rdev2->desc_nr = desc_nr;
1288 d = &sb->disks[rdev2->desc_nr];
1289 nr_disks++;
1290 d->number = rdev2->desc_nr;
1291 d->major = MAJOR(rdev2->bdev->bd_dev);
1292 d->minor = MINOR(rdev2->bdev->bd_dev);
1293 if (is_active)
1294 d->raid_disk = rdev2->raid_disk;
1295 else
1296 d->raid_disk = rdev2->desc_nr;
1297 if (test_bit(Faulty, &rdev2->flags))
1298 d->state = (1<<MD_DISK_FAULTY);
1299 else if (is_active) {
1300 d->state = (1<<MD_DISK_ACTIVE);
1301 if (test_bit(In_sync, &rdev2->flags))
1302 d->state |= (1<<MD_DISK_SYNC);
1303 active++;
1304 working++;
1305 } else {
1306 d->state = 0;
1307 spare++;
1308 working++;
1309 }
1310 if (test_bit(WriteMostly, &rdev2->flags))
1311 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1312 }
1313
1314 for (i=0 ; i < mddev->raid_disks ; i++) {
1315 mdp_disk_t *d = &sb->disks[i];
1316 if (d->state == 0 && d->number == 0) {
1317 d->number = i;
1318 d->raid_disk = i;
1319 d->state = (1<<MD_DISK_REMOVED);
1320 d->state |= (1<<MD_DISK_FAULTY);
1321 failed++;
1322 }
1323 }
1324 sb->nr_disks = nr_disks;
1325 sb->active_disks = active;
1326 sb->working_disks = working;
1327 sb->failed_disks = failed;
1328 sb->spare_disks = spare;
1329
1330 sb->this_disk = sb->disks[rdev->desc_nr];
1331 sb->sb_csum = calc_sb_csum(sb);
1332}
1333
1334
1335
1336
1337static unsigned long long
1338super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1339{
1340 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1341 return 0;
1342 if (rdev->mddev->bitmap_info.offset)
1343 return 0;
1344 rdev->sb_start = calc_dev_sboffset(rdev);
1345 if (!num_sectors || num_sectors > rdev->sb_start)
1346 num_sectors = rdev->sb_start;
1347
1348
1349
1350 if (num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1351 num_sectors = (2ULL << 32) - 2;
1352 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1353 rdev->sb_page);
1354 md_super_wait(rdev->mddev);
1355 return num_sectors;
1356}
1357
1358static int
1359super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1360{
1361
1362 return new_offset == 0;
1363}
1364
1365
1366
1367
1368
1369static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1370{
1371 __le32 disk_csum;
1372 u32 csum;
1373 unsigned long long newcsum;
1374 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1375 __le32 *isuper = (__le32*)sb;
1376
1377 disk_csum = sb->sb_csum;
1378 sb->sb_csum = 0;
1379 newcsum = 0;
1380 for (; size >= 4; size -= 4)
1381 newcsum += le32_to_cpu(*isuper++);
1382
1383 if (size == 2)
1384 newcsum += le16_to_cpu(*(__le16*) isuper);
1385
1386 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1387 sb->sb_csum = disk_csum;
1388 return cpu_to_le32(csum);
1389}
1390
1391static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
1392 int acknowledged);
1393static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1394{
1395 struct mdp_superblock_1 *sb;
1396 int ret;
1397 sector_t sb_start;
1398 sector_t sectors;
1399 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1400 int bmask;
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410 switch(minor_version) {
1411 case 0:
1412 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1413 sb_start -= 8*2;
1414 sb_start &= ~(sector_t)(4*2-1);
1415 break;
1416 case 1:
1417 sb_start = 0;
1418 break;
1419 case 2:
1420 sb_start = 8;
1421 break;
1422 default:
1423 return -EINVAL;
1424 }
1425 rdev->sb_start = sb_start;
1426
1427
1428
1429
1430 ret = read_disk_sb(rdev, 4096);
1431 if (ret) return ret;
1432
1433
1434 sb = page_address(rdev->sb_page);
1435
1436 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1437 sb->major_version != cpu_to_le32(1) ||
1438 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1439 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1440 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1441 return -EINVAL;
1442
1443 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1444 printk("md: invalid superblock checksum on %s\n",
1445 bdevname(rdev->bdev,b));
1446 return -EINVAL;
1447 }
1448 if (le64_to_cpu(sb->data_size) < 10) {
1449 printk("md: data_size too small on %s\n",
1450 bdevname(rdev->bdev,b));
1451 return -EINVAL;
1452 }
1453 if (sb->pad0 ||
1454 sb->pad3[0] ||
1455 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1456
1457 return -EINVAL;
1458
1459 rdev->preferred_minor = 0xffff;
1460 rdev->data_offset = le64_to_cpu(sb->data_offset);
1461 rdev->new_data_offset = rdev->data_offset;
1462 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1463 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1464 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1465 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1466
1467 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1468 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1469 if (rdev->sb_size & bmask)
1470 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1471
1472 if (minor_version
1473 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1474 return -EINVAL;
1475 if (minor_version
1476 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1477 return -EINVAL;
1478
1479 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1480 rdev->desc_nr = -1;
1481 else
1482 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1483
1484 if (!rdev->bb_page) {
1485 rdev->bb_page = alloc_page(GFP_KERNEL);
1486 if (!rdev->bb_page)
1487 return -ENOMEM;
1488 }
1489 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1490 rdev->badblocks.count == 0) {
1491
1492
1493
1494 s32 offset;
1495 sector_t bb_sector;
1496 u64 *bbp;
1497 int i;
1498 int sectors = le16_to_cpu(sb->bblog_size);
1499 if (sectors > (PAGE_SIZE / 512))
1500 return -EINVAL;
1501 offset = le32_to_cpu(sb->bblog_offset);
1502 if (offset == 0)
1503 return -EINVAL;
1504 bb_sector = (long long)offset;
1505 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1506 rdev->bb_page, READ, true))
1507 return -EIO;
1508 bbp = (u64 *)page_address(rdev->bb_page);
1509 rdev->badblocks.shift = sb->bblog_shift;
1510 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1511 u64 bb = le64_to_cpu(*bbp);
1512 int count = bb & (0x3ff);
1513 u64 sector = bb >> 10;
1514 sector <<= sb->bblog_shift;
1515 count <<= sb->bblog_shift;
1516 if (bb + 1 == 0)
1517 break;
1518 if (md_set_badblocks(&rdev->badblocks,
1519 sector, count, 1) == 0)
1520 return -EINVAL;
1521 }
1522 } else if (sb->bblog_offset != 0)
1523 rdev->badblocks.shift = 0;
1524
1525 if (!refdev) {
1526 ret = 1;
1527 } else {
1528 __u64 ev1, ev2;
1529 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1530
1531 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1532 sb->level != refsb->level ||
1533 sb->layout != refsb->layout ||
1534 sb->chunksize != refsb->chunksize) {
1535 printk(KERN_WARNING "md: %s has strangely different"
1536 " superblock to %s\n",
1537 bdevname(rdev->bdev,b),
1538 bdevname(refdev->bdev,b2));
1539 return -EINVAL;
1540 }
1541 ev1 = le64_to_cpu(sb->events);
1542 ev2 = le64_to_cpu(refsb->events);
1543
1544 if (ev1 > ev2)
1545 ret = 1;
1546 else
1547 ret = 0;
1548 }
1549 if (minor_version) {
1550 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1551 sectors -= rdev->data_offset;
1552 } else
1553 sectors = rdev->sb_start;
1554 if (sectors < le64_to_cpu(sb->data_size))
1555 return -EINVAL;
1556 rdev->sectors = le64_to_cpu(sb->data_size);
1557 return ret;
1558}
1559
1560static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1561{
1562 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1563 __u64 ev1 = le64_to_cpu(sb->events);
1564
1565 rdev->raid_disk = -1;
1566 clear_bit(Faulty, &rdev->flags);
1567 clear_bit(In_sync, &rdev->flags);
1568 clear_bit(Bitmap_sync, &rdev->flags);
1569 clear_bit(WriteMostly, &rdev->flags);
1570
1571 if (mddev->raid_disks == 0) {
1572 mddev->major_version = 1;
1573 mddev->patch_version = 0;
1574 mddev->external = 0;
1575 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1576 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1577 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
1578 mddev->level = le32_to_cpu(sb->level);
1579 mddev->clevel[0] = 0;
1580 mddev->layout = le32_to_cpu(sb->layout);
1581 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1582 mddev->dev_sectors = le64_to_cpu(sb->size);
1583 mddev->events = ev1;
1584 mddev->bitmap_info.offset = 0;
1585 mddev->bitmap_info.space = 0;
1586
1587
1588
1589 mddev->bitmap_info.default_offset = 1024 >> 9;
1590 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1591 mddev->reshape_backwards = 0;
1592
1593 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1594 memcpy(mddev->uuid, sb->set_uuid, 16);
1595
1596 mddev->max_disks = (4096-256)/2;
1597
1598 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1599 mddev->bitmap_info.file == NULL) {
1600 mddev->bitmap_info.offset =
1601 (__s32)le32_to_cpu(sb->bitmap_offset);
1602
1603
1604
1605
1606
1607 if (mddev->minor_version > 0)
1608 mddev->bitmap_info.space = 0;
1609 else if (mddev->bitmap_info.offset > 0)
1610 mddev->bitmap_info.space =
1611 8 - mddev->bitmap_info.offset;
1612 else
1613 mddev->bitmap_info.space =
1614 -mddev->bitmap_info.offset;
1615 }
1616
1617 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1618 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1619 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1620 mddev->new_level = le32_to_cpu(sb->new_level);
1621 mddev->new_layout = le32_to_cpu(sb->new_layout);
1622 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1623 if (mddev->delta_disks < 0 ||
1624 (mddev->delta_disks == 0 &&
1625 (le32_to_cpu(sb->feature_map)
1626 & MD_FEATURE_RESHAPE_BACKWARDS)))
1627 mddev->reshape_backwards = 1;
1628 } else {
1629 mddev->reshape_position = MaxSector;
1630 mddev->delta_disks = 0;
1631 mddev->new_level = mddev->level;
1632 mddev->new_layout = mddev->layout;
1633 mddev->new_chunk_sectors = mddev->chunk_sectors;
1634 }
1635
1636 } else if (mddev->pers == NULL) {
1637
1638
1639 ++ev1;
1640 if (rdev->desc_nr >= 0 &&
1641 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1642 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < 0xfffe)
1643 if (ev1 < mddev->events)
1644 return -EINVAL;
1645 } else if (mddev->bitmap) {
1646
1647
1648
1649 if (ev1 < mddev->bitmap->events_cleared)
1650 return 0;
1651 if (ev1 < mddev->events)
1652 set_bit(Bitmap_sync, &rdev->flags);
1653 } else {
1654 if (ev1 < mddev->events)
1655
1656 return 0;
1657 }
1658 if (mddev->level != LEVEL_MULTIPATH) {
1659 int role;
1660 if (rdev->desc_nr < 0 ||
1661 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1662 role = 0xffff;
1663 rdev->desc_nr = -1;
1664 } else
1665 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1666 switch(role) {
1667 case 0xffff:
1668 break;
1669 case 0xfffe:
1670 set_bit(Faulty, &rdev->flags);
1671 break;
1672 default:
1673 rdev->saved_raid_disk = role;
1674 if ((le32_to_cpu(sb->feature_map) &
1675 MD_FEATURE_RECOVERY_OFFSET)) {
1676 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1677 if (!(le32_to_cpu(sb->feature_map) &
1678 MD_FEATURE_RECOVERY_BITMAP))
1679 rdev->saved_raid_disk = -1;
1680 } else
1681 set_bit(In_sync, &rdev->flags);
1682 rdev->raid_disk = role;
1683 break;
1684 }
1685 if (sb->devflags & WriteMostly1)
1686 set_bit(WriteMostly, &rdev->flags);
1687 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1688 set_bit(Replacement, &rdev->flags);
1689 } else
1690 set_bit(In_sync, &rdev->flags);
1691
1692 return 0;
1693}
1694
1695static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1696{
1697 struct mdp_superblock_1 *sb;
1698 struct md_rdev *rdev2;
1699 int max_dev, i;
1700
1701
1702 sb = page_address(rdev->sb_page);
1703
1704 sb->feature_map = 0;
1705 sb->pad0 = 0;
1706 sb->recovery_offset = cpu_to_le64(0);
1707 memset(sb->pad3, 0, sizeof(sb->pad3));
1708
1709 sb->utime = cpu_to_le64((__u64)mddev->utime);
1710 sb->events = cpu_to_le64(mddev->events);
1711 if (mddev->in_sync)
1712 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1713 else
1714 sb->resync_offset = cpu_to_le64(0);
1715
1716 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1717
1718 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1719 sb->size = cpu_to_le64(mddev->dev_sectors);
1720 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1721 sb->level = cpu_to_le32(mddev->level);
1722 sb->layout = cpu_to_le32(mddev->layout);
1723
1724 if (test_bit(WriteMostly, &rdev->flags))
1725 sb->devflags |= WriteMostly1;
1726 else
1727 sb->devflags &= ~WriteMostly1;
1728 sb->data_offset = cpu_to_le64(rdev->data_offset);
1729 sb->data_size = cpu_to_le64(rdev->sectors);
1730
1731 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1732 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1733 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1734 }
1735
1736 if (rdev->raid_disk >= 0 &&
1737 !test_bit(In_sync, &rdev->flags)) {
1738 sb->feature_map |=
1739 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1740 sb->recovery_offset =
1741 cpu_to_le64(rdev->recovery_offset);
1742 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
1743 sb->feature_map |=
1744 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
1745 }
1746 if (test_bit(Replacement, &rdev->flags))
1747 sb->feature_map |=
1748 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1749
1750 if (mddev->reshape_position != MaxSector) {
1751 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1752 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1753 sb->new_layout = cpu_to_le32(mddev->new_layout);
1754 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1755 sb->new_level = cpu_to_le32(mddev->new_level);
1756 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1757 if (mddev->delta_disks == 0 &&
1758 mddev->reshape_backwards)
1759 sb->feature_map
1760 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1761 if (rdev->new_data_offset != rdev->data_offset) {
1762 sb->feature_map
1763 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1764 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1765 - rdev->data_offset));
1766 }
1767 }
1768
1769 if (rdev->badblocks.count == 0)
1770 ;
1771 else if (sb->bblog_offset == 0)
1772
1773 md_error(mddev, rdev);
1774 else {
1775 struct badblocks *bb = &rdev->badblocks;
1776 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1777 u64 *p = bb->page;
1778 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1779 if (bb->changed) {
1780 unsigned seq;
1781
1782retry:
1783 seq = read_seqbegin(&bb->lock);
1784
1785 memset(bbp, 0xff, PAGE_SIZE);
1786
1787 for (i = 0 ; i < bb->count ; i++) {
1788 u64 internal_bb = p[i];
1789 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1790 | BB_LEN(internal_bb));
1791 bbp[i] = cpu_to_le64(store_bb);
1792 }
1793 bb->changed = 0;
1794 if (read_seqretry(&bb->lock, seq))
1795 goto retry;
1796
1797 bb->sector = (rdev->sb_start +
1798 (int)le32_to_cpu(sb->bblog_offset));
1799 bb->size = le16_to_cpu(sb->bblog_size);
1800 }
1801 }
1802
1803 max_dev = 0;
1804 rdev_for_each(rdev2, mddev)
1805 if (rdev2->desc_nr+1 > max_dev)
1806 max_dev = rdev2->desc_nr+1;
1807
1808 if (max_dev > le32_to_cpu(sb->max_dev)) {
1809 int bmask;
1810 sb->max_dev = cpu_to_le32(max_dev);
1811 rdev->sb_size = max_dev * 2 + 256;
1812 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1813 if (rdev->sb_size & bmask)
1814 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1815 } else
1816 max_dev = le32_to_cpu(sb->max_dev);
1817
1818 for (i=0; i<max_dev;i++)
1819 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1820
1821 rdev_for_each(rdev2, mddev) {
1822 i = rdev2->desc_nr;
1823 if (test_bit(Faulty, &rdev2->flags))
1824 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1825 else if (test_bit(In_sync, &rdev2->flags))
1826 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1827 else if (rdev2->raid_disk >= 0)
1828 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1829 else
1830 sb->dev_roles[i] = cpu_to_le16(0xffff);
1831 }
1832
1833 sb->sb_csum = calc_sb_1_csum(sb);
1834}
1835
1836static unsigned long long
1837super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1838{
1839 struct mdp_superblock_1 *sb;
1840 sector_t max_sectors;
1841 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1842 return 0;
1843 if (rdev->data_offset != rdev->new_data_offset)
1844 return 0;
1845 if (rdev->sb_start < rdev->data_offset) {
1846
1847 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1848 max_sectors -= rdev->data_offset;
1849 if (!num_sectors || num_sectors > max_sectors)
1850 num_sectors = max_sectors;
1851 } else if (rdev->mddev->bitmap_info.offset) {
1852
1853 return 0;
1854 } else {
1855
1856 sector_t sb_start;
1857 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1858 sb_start &= ~(sector_t)(4*2 - 1);
1859 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1860 if (!num_sectors || num_sectors > max_sectors)
1861 num_sectors = max_sectors;
1862 rdev->sb_start = sb_start;
1863 }
1864 sb = page_address(rdev->sb_page);
1865 sb->data_size = cpu_to_le64(num_sectors);
1866 sb->super_offset = rdev->sb_start;
1867 sb->sb_csum = calc_sb_1_csum(sb);
1868 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1869 rdev->sb_page);
1870 md_super_wait(rdev->mddev);
1871 return num_sectors;
1872
1873}
1874
1875static int
1876super_1_allow_new_offset(struct md_rdev *rdev,
1877 unsigned long long new_offset)
1878{
1879
1880 struct bitmap *bitmap;
1881 if (new_offset >= rdev->data_offset)
1882 return 1;
1883
1884
1885
1886 if (rdev->mddev->minor_version == 0)
1887 return 1;
1888
1889
1890
1891
1892
1893
1894
1895 if (rdev->sb_start + (32+4)*2 > new_offset)
1896 return 0;
1897 bitmap = rdev->mddev->bitmap;
1898 if (bitmap && !rdev->mddev->bitmap_info.file &&
1899 rdev->sb_start + rdev->mddev->bitmap_info.offset +
1900 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
1901 return 0;
1902 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
1903 return 0;
1904
1905 return 1;
1906}
1907
1908static struct super_type super_types[] = {
1909 [0] = {
1910 .name = "0.90.0",
1911 .owner = THIS_MODULE,
1912 .load_super = super_90_load,
1913 .validate_super = super_90_validate,
1914 .sync_super = super_90_sync,
1915 .rdev_size_change = super_90_rdev_size_change,
1916 .allow_new_offset = super_90_allow_new_offset,
1917 },
1918 [1] = {
1919 .name = "md-1",
1920 .owner = THIS_MODULE,
1921 .load_super = super_1_load,
1922 .validate_super = super_1_validate,
1923 .sync_super = super_1_sync,
1924 .rdev_size_change = super_1_rdev_size_change,
1925 .allow_new_offset = super_1_allow_new_offset,
1926 },
1927};
1928
1929static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
1930{
1931 if (mddev->sync_super) {
1932 mddev->sync_super(mddev, rdev);
1933 return;
1934 }
1935
1936 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1937
1938 super_types[mddev->major_version].sync_super(mddev, rdev);
1939}
1940
1941static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
1942{
1943 struct md_rdev *rdev, *rdev2;
1944
1945 rcu_read_lock();
1946 rdev_for_each_rcu(rdev, mddev1)
1947 rdev_for_each_rcu(rdev2, mddev2)
1948 if (rdev->bdev->bd_contains ==
1949 rdev2->bdev->bd_contains) {
1950 rcu_read_unlock();
1951 return 1;
1952 }
1953 rcu_read_unlock();
1954 return 0;
1955}
1956
1957static LIST_HEAD(pending_raid_disks);
1958
1959
1960
1961
1962
1963
1964
1965
1966int md_integrity_register(struct mddev *mddev)
1967{
1968 struct md_rdev *rdev, *reference = NULL;
1969
1970 if (list_empty(&mddev->disks))
1971 return 0;
1972 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
1973 return 0;
1974 rdev_for_each(rdev, mddev) {
1975
1976 if (test_bit(Faulty, &rdev->flags))
1977 continue;
1978 if (rdev->raid_disk < 0)
1979 continue;
1980 if (!reference) {
1981
1982 reference = rdev;
1983 continue;
1984 }
1985
1986 if (blk_integrity_compare(reference->bdev->bd_disk,
1987 rdev->bdev->bd_disk) < 0)
1988 return -EINVAL;
1989 }
1990 if (!reference || !bdev_get_integrity(reference->bdev))
1991 return 0;
1992
1993
1994
1995
1996 if (blk_integrity_register(mddev->gendisk,
1997 bdev_get_integrity(reference->bdev)) != 0) {
1998 printk(KERN_ERR "md: failed to register integrity for %s\n",
1999 mdname(mddev));
2000 return -EINVAL;
2001 }
2002 printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
2003 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
2004 printk(KERN_ERR "md: failed to create integrity pool for %s\n",
2005 mdname(mddev));
2006 return -EINVAL;
2007 }
2008 return 0;
2009}
2010EXPORT_SYMBOL(md_integrity_register);
2011
2012
2013void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2014{
2015 struct blk_integrity *bi_rdev;
2016 struct blk_integrity *bi_mddev;
2017
2018 if (!mddev->gendisk)
2019 return;
2020
2021 bi_rdev = bdev_get_integrity(rdev->bdev);
2022 bi_mddev = blk_get_integrity(mddev->gendisk);
2023
2024 if (!bi_mddev)
2025 return;
2026 if (rdev->raid_disk < 0)
2027 return;
2028 if (bi_rdev && blk_integrity_compare(mddev->gendisk,
2029 rdev->bdev->bd_disk) >= 0)
2030 return;
2031 printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
2032 blk_integrity_unregister(mddev->gendisk);
2033}
2034EXPORT_SYMBOL(md_integrity_add_rdev);
2035
2036static int bind_rdev_to_array(struct md_rdev * rdev, struct mddev * mddev)
2037{
2038 char b[BDEVNAME_SIZE];
2039 struct kobject *ko;
2040 char *s;
2041 int err;
2042
2043 if (rdev->mddev) {
2044 MD_BUG();
2045 return -EINVAL;
2046 }
2047
2048
2049 if (find_rdev(mddev, rdev->bdev->bd_dev))
2050 return -EEXIST;
2051
2052
2053 if (rdev->sectors && (mddev->dev_sectors == 0 ||
2054 rdev->sectors < mddev->dev_sectors)) {
2055 if (mddev->pers) {
2056
2057
2058
2059
2060 if (mddev->level > 0)
2061 return -ENOSPC;
2062 } else
2063 mddev->dev_sectors = rdev->sectors;
2064 }
2065
2066
2067
2068
2069
2070 if (rdev->desc_nr < 0) {
2071 int choice = 0;
2072 if (mddev->pers) choice = mddev->raid_disks;
2073 while (find_rdev_nr(mddev, choice))
2074 choice++;
2075 rdev->desc_nr = choice;
2076 } else {
2077 if (find_rdev_nr(mddev, rdev->desc_nr))
2078 return -EBUSY;
2079 }
2080 if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2081 printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
2082 mdname(mddev), mddev->max_disks);
2083 return -EBUSY;
2084 }
2085 bdevname(rdev->bdev,b);
2086 while ( (s=strchr(b, '/')) != NULL)
2087 *s = '!';
2088
2089 rdev->mddev = mddev;
2090 printk(KERN_INFO "md: bind<%s>\n", b);
2091
2092 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2093 goto fail;
2094
2095 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2096 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2097 ;
2098 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2099
2100 list_add_rcu(&rdev->same_set, &mddev->disks);
2101 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2102
2103
2104 mddev->recovery_disabled++;
2105
2106 return 0;
2107
2108 fail:
2109 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
2110 b, mdname(mddev));
2111 return err;
2112}
2113
2114static void md_delayed_delete(struct work_struct *ws)
2115{
2116 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2117 kobject_del(&rdev->kobj);
2118 kobject_put(&rdev->kobj);
2119}
2120
2121static void unbind_rdev_from_array(struct md_rdev * rdev)
2122{
2123 char b[BDEVNAME_SIZE];
2124 if (!rdev->mddev) {
2125 MD_BUG();
2126 return;
2127 }
2128 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2129 list_del_rcu(&rdev->same_set);
2130 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
2131 rdev->mddev = NULL;
2132 sysfs_remove_link(&rdev->kobj, "block");
2133 sysfs_put(rdev->sysfs_state);
2134 rdev->sysfs_state = NULL;
2135 rdev->badblocks.count = 0;
2136
2137
2138
2139
2140 synchronize_rcu();
2141 INIT_WORK(&rdev->del_work, md_delayed_delete);
2142 kobject_get(&rdev->kobj);
2143 queue_work(md_misc_wq, &rdev->del_work);
2144}
2145
2146
2147
2148
2149
2150
2151static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2152{
2153 int err = 0;
2154 struct block_device *bdev;
2155 char b[BDEVNAME_SIZE];
2156
2157 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2158 shared ? (struct md_rdev *)lock_rdev : rdev);
2159 if (IS_ERR(bdev)) {
2160 printk(KERN_ERR "md: could not open %s.\n",
2161 __bdevname(dev, b));
2162 return PTR_ERR(bdev);
2163 }
2164 rdev->bdev = bdev;
2165 return err;
2166}
2167
2168static void unlock_rdev(struct md_rdev *rdev)
2169{
2170 struct block_device *bdev = rdev->bdev;
2171 rdev->bdev = NULL;
2172 if (!bdev)
2173 MD_BUG();
2174 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2175}
2176
2177void md_autodetect_dev(dev_t dev);
2178
2179static void export_rdev(struct md_rdev * rdev)
2180{
2181 char b[BDEVNAME_SIZE];
2182 printk(KERN_INFO "md: export_rdev(%s)\n",
2183 bdevname(rdev->bdev,b));
2184 if (rdev->mddev)
2185 MD_BUG();
2186 md_rdev_clear(rdev);
2187#ifndef MODULE
2188 if (test_bit(AutoDetected, &rdev->flags))
2189 md_autodetect_dev(rdev->bdev->bd_dev);
2190#endif
2191 unlock_rdev(rdev);
2192 kobject_put(&rdev->kobj);
2193}
2194
2195static void kick_rdev_from_array(struct md_rdev * rdev)
2196{
2197 unbind_rdev_from_array(rdev);
2198 export_rdev(rdev);
2199}
2200
2201static void export_array(struct mddev *mddev)
2202{
2203 struct md_rdev *rdev, *tmp;
2204
2205 rdev_for_each_safe(rdev, tmp, mddev) {
2206 if (!rdev->mddev) {
2207 MD_BUG();
2208 continue;
2209 }
2210 kick_rdev_from_array(rdev);
2211 }
2212 if (!list_empty(&mddev->disks))
2213 MD_BUG();
2214 mddev->raid_disks = 0;
2215 mddev->major_version = 0;
2216}
2217
2218static void print_desc(mdp_disk_t *desc)
2219{
2220 printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
2221 desc->major,desc->minor,desc->raid_disk,desc->state);
2222}
2223
2224static void print_sb_90(mdp_super_t *sb)
2225{
2226 int i;
2227
2228 printk(KERN_INFO
2229 "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
2230 sb->major_version, sb->minor_version, sb->patch_version,
2231 sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
2232 sb->ctime);
2233 printk(KERN_INFO "md: L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
2234 sb->level, sb->size, sb->nr_disks, sb->raid_disks,
2235 sb->md_minor, sb->layout, sb->chunk_size);
2236 printk(KERN_INFO "md: UT:%08x ST:%d AD:%d WD:%d"
2237 " FD:%d SD:%d CSUM:%08x E:%08lx\n",
2238 sb->utime, sb->state, sb->active_disks, sb->working_disks,
2239 sb->failed_disks, sb->spare_disks,
2240 sb->sb_csum, (unsigned long)sb->events_lo);
2241
2242 printk(KERN_INFO);
2243 for (i = 0; i < MD_SB_DISKS; i++) {
2244 mdp_disk_t *desc;
2245
2246 desc = sb->disks + i;
2247 if (desc->number || desc->major || desc->minor ||
2248 desc->raid_disk || (desc->state && (desc->state != 4))) {
2249 printk(" D %2d: ", i);
2250 print_desc(desc);
2251 }
2252 }
2253 printk(KERN_INFO "md: THIS: ");
2254 print_desc(&sb->this_disk);
2255}
2256
2257static void print_sb_1(struct mdp_superblock_1 *sb)
2258{
2259 __u8 *uuid;
2260
2261 uuid = sb->set_uuid;
2262 printk(KERN_INFO
2263 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n"
2264 "md: Name: \"%s\" CT:%llu\n",
2265 le32_to_cpu(sb->major_version),
2266 le32_to_cpu(sb->feature_map),
2267 uuid,
2268 sb->set_name,
2269 (unsigned long long)le64_to_cpu(sb->ctime)
2270 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
2271
2272 uuid = sb->device_uuid;
2273 printk(KERN_INFO
2274 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
2275 " RO:%llu\n"
2276 "md: Dev:%08x UUID: %pU\n"
2277 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
2278 "md: (MaxDev:%u) \n",
2279 le32_to_cpu(sb->level),
2280 (unsigned long long)le64_to_cpu(sb->size),
2281 le32_to_cpu(sb->raid_disks),
2282 le32_to_cpu(sb->layout),
2283 le32_to_cpu(sb->chunksize),
2284 (unsigned long long)le64_to_cpu(sb->data_offset),
2285 (unsigned long long)le64_to_cpu(sb->data_size),
2286 (unsigned long long)le64_to_cpu(sb->super_offset),
2287 (unsigned long long)le64_to_cpu(sb->recovery_offset),
2288 le32_to_cpu(sb->dev_number),
2289 uuid,
2290 sb->devflags,
2291 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
2292 (unsigned long long)le64_to_cpu(sb->events),
2293 (unsigned long long)le64_to_cpu(sb->resync_offset),
2294 le32_to_cpu(sb->sb_csum),
2295 le32_to_cpu(sb->max_dev)
2296 );
2297}
2298
2299static void print_rdev(struct md_rdev *rdev, int major_version)
2300{
2301 char b[BDEVNAME_SIZE];
2302 printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
2303 bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
2304 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
2305 rdev->desc_nr);
2306 if (rdev->sb_loaded) {
2307 printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
2308 switch (major_version) {
2309 case 0:
2310 print_sb_90(page_address(rdev->sb_page));
2311 break;
2312 case 1:
2313 print_sb_1(page_address(rdev->sb_page));
2314 break;
2315 }
2316 } else
2317 printk(KERN_INFO "md: no rdev superblock!\n");
2318}
2319
2320static void md_print_devices(void)
2321{
2322 struct list_head *tmp;
2323 struct md_rdev *rdev;
2324 struct mddev *mddev;
2325 char b[BDEVNAME_SIZE];
2326
2327 printk("\n");
2328 printk("md: **********************************\n");
2329 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
2330 printk("md: **********************************\n");
2331 for_each_mddev(mddev, tmp) {
2332
2333 if (mddev->bitmap)
2334 bitmap_print_sb(mddev->bitmap);
2335 else
2336 printk("%s: ", mdname(mddev));
2337 rdev_for_each(rdev, mddev)
2338 printk("<%s>", bdevname(rdev->bdev,b));
2339 printk("\n");
2340
2341 rdev_for_each(rdev, mddev)
2342 print_rdev(rdev, mddev->major_version);
2343 }
2344 printk("md: **********************************\n");
2345 printk("\n");
2346}
2347
2348
2349static void sync_sbs(struct mddev * mddev, int nospares)
2350{
2351
2352
2353
2354
2355
2356
2357 struct md_rdev *rdev;
2358 rdev_for_each(rdev, mddev) {
2359 if (rdev->sb_events == mddev->events ||
2360 (nospares &&
2361 rdev->raid_disk < 0 &&
2362 rdev->sb_events+1 == mddev->events)) {
2363
2364 rdev->sb_loaded = 2;
2365 } else {
2366 sync_super(mddev, rdev);
2367 rdev->sb_loaded = 1;
2368 }
2369 }
2370}
2371
2372static void md_update_sb(struct mddev * mddev, int force_change)
2373{
2374 struct md_rdev *rdev;
2375 int sync_req;
2376 int nospares = 0;
2377 int any_badblocks_changed = 0;
2378
2379 if (mddev->ro) {
2380 if (force_change)
2381 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2382 return;
2383 }
2384repeat:
2385
2386 rdev_for_each(rdev, mddev) {
2387 if (rdev->raid_disk >= 0 &&
2388 mddev->delta_disks >= 0 &&
2389 !test_bit(In_sync, &rdev->flags) &&
2390 mddev->curr_resync_completed > rdev->recovery_offset)
2391 rdev->recovery_offset = mddev->curr_resync_completed;
2392
2393 }
2394 if (!mddev->persistent) {
2395 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2396 clear_bit(MD_CHANGE_DEVS, &mddev->flags);
2397 if (!mddev->external) {
2398 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2399 rdev_for_each(rdev, mddev) {
2400 if (rdev->badblocks.changed) {
2401 rdev->badblocks.changed = 0;
2402 md_ack_all_badblocks(&rdev->badblocks);
2403 md_error(mddev, rdev);
2404 }
2405 clear_bit(Blocked, &rdev->flags);
2406 clear_bit(BlockedBadBlocks, &rdev->flags);
2407 wake_up(&rdev->blocked_wait);
2408 }
2409 }
2410 wake_up(&mddev->sb_wait);
2411 return;
2412 }
2413
2414 spin_lock_irq(&mddev->write_lock);
2415
2416 mddev->utime = get_seconds();
2417
2418 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2419 force_change = 1;
2420 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2421
2422
2423
2424
2425 nospares = 1;
2426 if (force_change)
2427 nospares = 0;
2428 if (mddev->degraded)
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438 nospares = 0;
2439
2440 sync_req = mddev->in_sync;
2441
2442
2443
2444 if (nospares
2445 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2446 && mddev->can_decrease_events
2447 && mddev->events != 1) {
2448 mddev->events--;
2449 mddev->can_decrease_events = 0;
2450 } else {
2451
2452 mddev->events ++;
2453 mddev->can_decrease_events = nospares;
2454 }
2455
2456 if (!mddev->events) {
2457
2458
2459
2460
2461
2462 MD_BUG();
2463 mddev->events --;
2464 }
2465
2466 rdev_for_each(rdev, mddev) {
2467 if (rdev->badblocks.changed)
2468 any_badblocks_changed++;
2469 if (test_bit(Faulty, &rdev->flags))
2470 set_bit(FaultRecorded, &rdev->flags);
2471 }
2472
2473 sync_sbs(mddev, nospares);
2474 spin_unlock_irq(&mddev->write_lock);
2475
2476 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2477 mdname(mddev), mddev->in_sync);
2478
2479 bitmap_update_sb(mddev->bitmap);
2480 rdev_for_each(rdev, mddev) {
2481 char b[BDEVNAME_SIZE];
2482
2483 if (rdev->sb_loaded != 1)
2484 continue;
2485
2486 if (!test_bit(Faulty, &rdev->flags)) {
2487 md_super_write(mddev,rdev,
2488 rdev->sb_start, rdev->sb_size,
2489 rdev->sb_page);
2490 pr_debug("md: (write) %s's sb offset: %llu\n",
2491 bdevname(rdev->bdev, b),
2492 (unsigned long long)rdev->sb_start);
2493 rdev->sb_events = mddev->events;
2494 if (rdev->badblocks.size) {
2495 md_super_write(mddev, rdev,
2496 rdev->badblocks.sector,
2497 rdev->badblocks.size << 9,
2498 rdev->bb_page);
2499 rdev->badblocks.size = 0;
2500 }
2501
2502 } else
2503 pr_debug("md: %s (skipping faulty)\n",
2504 bdevname(rdev->bdev, b));
2505
2506 if (mddev->level == LEVEL_MULTIPATH)
2507
2508 break;
2509 }
2510 md_super_wait(mddev);
2511
2512
2513 spin_lock_irq(&mddev->write_lock);
2514 if (mddev->in_sync != sync_req ||
2515 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
2516
2517 spin_unlock_irq(&mddev->write_lock);
2518 goto repeat;
2519 }
2520 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2521 spin_unlock_irq(&mddev->write_lock);
2522 wake_up(&mddev->sb_wait);
2523 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2524 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2525
2526 rdev_for_each(rdev, mddev) {
2527 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2528 clear_bit(Blocked, &rdev->flags);
2529
2530 if (any_badblocks_changed)
2531 md_ack_all_badblocks(&rdev->badblocks);
2532 clear_bit(BlockedBadBlocks, &rdev->flags);
2533 wake_up(&rdev->blocked_wait);
2534 }
2535}
2536
2537
2538
2539
2540static int cmd_match(const char *cmd, const char *str)
2541{
2542
2543
2544
2545
2546 while (*cmd && *str && *cmd == *str) {
2547 cmd++;
2548 str++;
2549 }
2550 if (*cmd == '\n')
2551 cmd++;
2552 if (*str || *cmd)
2553 return 0;
2554 return 1;
2555}
2556
2557struct rdev_sysfs_entry {
2558 struct attribute attr;
2559 ssize_t (*show)(struct md_rdev *, char *);
2560 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2561};
2562
2563static ssize_t
2564state_show(struct md_rdev *rdev, char *page)
2565{
2566 char *sep = "";
2567 size_t len = 0;
2568
2569 if (test_bit(Faulty, &rdev->flags) ||
2570 rdev->badblocks.unacked_exist) {
2571 len+= sprintf(page+len, "%sfaulty",sep);
2572 sep = ",";
2573 }
2574 if (test_bit(In_sync, &rdev->flags)) {
2575 len += sprintf(page+len, "%sin_sync",sep);
2576 sep = ",";
2577 }
2578 if (test_bit(WriteMostly, &rdev->flags)) {
2579 len += sprintf(page+len, "%swrite_mostly",sep);
2580 sep = ",";
2581 }
2582 if (test_bit(Blocked, &rdev->flags) ||
2583 (rdev->badblocks.unacked_exist
2584 && !test_bit(Faulty, &rdev->flags))) {
2585 len += sprintf(page+len, "%sblocked", sep);
2586 sep = ",";
2587 }
2588 if (!test_bit(Faulty, &rdev->flags) &&
2589 !test_bit(In_sync, &rdev->flags)) {
2590 len += sprintf(page+len, "%sspare", sep);
2591 sep = ",";
2592 }
2593 if (test_bit(WriteErrorSeen, &rdev->flags)) {
2594 len += sprintf(page+len, "%swrite_error", sep);
2595 sep = ",";
2596 }
2597 if (test_bit(WantReplacement, &rdev->flags)) {
2598 len += sprintf(page+len, "%swant_replacement", sep);
2599 sep = ",";
2600 }
2601 if (test_bit(Replacement, &rdev->flags)) {
2602 len += sprintf(page+len, "%sreplacement", sep);
2603 sep = ",";
2604 }
2605
2606 return len+sprintf(page+len, "\n");
2607}
2608
2609static ssize_t
2610state_store(struct md_rdev *rdev, const char *buf, size_t len)
2611{
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625 int err = -EINVAL;
2626 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2627 md_error(rdev->mddev, rdev);
2628 if (test_bit(Faulty, &rdev->flags))
2629 err = 0;
2630 else
2631 err = -EBUSY;
2632 } else if (cmd_match(buf, "remove")) {
2633 if (rdev->raid_disk >= 0)
2634 err = -EBUSY;
2635 else {
2636 struct mddev *mddev = rdev->mddev;
2637 kick_rdev_from_array(rdev);
2638 if (mddev->pers)
2639 md_update_sb(mddev, 1);
2640 md_new_event(mddev);
2641 err = 0;
2642 }
2643 } else if (cmd_match(buf, "writemostly")) {
2644 set_bit(WriteMostly, &rdev->flags);
2645 err = 0;
2646 } else if (cmd_match(buf, "-writemostly")) {
2647 clear_bit(WriteMostly, &rdev->flags);
2648 err = 0;
2649 } else if (cmd_match(buf, "blocked")) {
2650 set_bit(Blocked, &rdev->flags);
2651 err = 0;
2652 } else if (cmd_match(buf, "-blocked")) {
2653 if (!test_bit(Faulty, &rdev->flags) &&
2654 rdev->badblocks.unacked_exist) {
2655
2656
2657
2658 md_error(rdev->mddev, rdev);
2659 }
2660 clear_bit(Blocked, &rdev->flags);
2661 clear_bit(BlockedBadBlocks, &rdev->flags);
2662 wake_up(&rdev->blocked_wait);
2663 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2664 md_wakeup_thread(rdev->mddev->thread);
2665
2666 err = 0;
2667 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2668 set_bit(In_sync, &rdev->flags);
2669 err = 0;
2670 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0) {
2671 clear_bit(In_sync, &rdev->flags);
2672 rdev->saved_raid_disk = rdev->raid_disk;
2673 rdev->raid_disk = -1;
2674 err = 0;
2675 } else if (cmd_match(buf, "write_error")) {
2676 set_bit(WriteErrorSeen, &rdev->flags);
2677 err = 0;
2678 } else if (cmd_match(buf, "-write_error")) {
2679 clear_bit(WriteErrorSeen, &rdev->flags);
2680 err = 0;
2681 } else if (cmd_match(buf, "want_replacement")) {
2682
2683
2684
2685
2686 if (rdev->raid_disk >= 0 &&
2687 !test_bit(Replacement, &rdev->flags))
2688 set_bit(WantReplacement, &rdev->flags);
2689 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2690 md_wakeup_thread(rdev->mddev->thread);
2691 err = 0;
2692 } else if (cmd_match(buf, "-want_replacement")) {
2693
2694
2695
2696 err = 0;
2697 clear_bit(WantReplacement, &rdev->flags);
2698 } else if (cmd_match(buf, "replacement")) {
2699
2700
2701
2702
2703 if (rdev->mddev->pers)
2704 err = -EBUSY;
2705 else {
2706 set_bit(Replacement, &rdev->flags);
2707 err = 0;
2708 }
2709 } else if (cmd_match(buf, "-replacement")) {
2710
2711 if (rdev->mddev->pers)
2712 err = -EBUSY;
2713 else {
2714 clear_bit(Replacement, &rdev->flags);
2715 err = 0;
2716 }
2717 }
2718 if (!err)
2719 sysfs_notify_dirent_safe(rdev->sysfs_state);
2720 return err ? err : len;
2721}
2722static struct rdev_sysfs_entry rdev_state =
2723__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
2724
2725static ssize_t
2726errors_show(struct md_rdev *rdev, char *page)
2727{
2728 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2729}
2730
2731static ssize_t
2732errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2733{
2734 char *e;
2735 unsigned long n = simple_strtoul(buf, &e, 10);
2736 if (*buf && (*e == 0 || *e == '\n')) {
2737 atomic_set(&rdev->corrected_errors, n);
2738 return len;
2739 }
2740 return -EINVAL;
2741}
2742static struct rdev_sysfs_entry rdev_errors =
2743__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2744
2745static ssize_t
2746slot_show(struct md_rdev *rdev, char *page)
2747{
2748 if (rdev->raid_disk < 0)
2749 return sprintf(page, "none\n");
2750 else
2751 return sprintf(page, "%d\n", rdev->raid_disk);
2752}
2753
2754static ssize_t
2755slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2756{
2757 char *e;
2758 int err;
2759 int slot = simple_strtoul(buf, &e, 10);
2760 if (strncmp(buf, "none", 4)==0)
2761 slot = -1;
2762 else if (e==buf || (*e && *e!= '\n'))
2763 return -EINVAL;
2764 if (rdev->mddev->pers && slot == -1) {
2765
2766
2767
2768
2769
2770
2771
2772 if (rdev->raid_disk == -1)
2773 return -EEXIST;
2774
2775 if (rdev->mddev->pers->hot_remove_disk == NULL)
2776 return -EINVAL;
2777 clear_bit(Blocked, &rdev->flags);
2778 remove_and_add_spares(rdev->mddev, rdev);
2779 if (rdev->raid_disk >= 0)
2780 return -EBUSY;
2781 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2782 md_wakeup_thread(rdev->mddev->thread);
2783 } else if (rdev->mddev->pers) {
2784
2785
2786
2787
2788 if (rdev->raid_disk != -1)
2789 return -EBUSY;
2790
2791 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2792 return -EBUSY;
2793
2794 if (rdev->mddev->pers->hot_add_disk == NULL)
2795 return -EINVAL;
2796
2797 if (slot >= rdev->mddev->raid_disks &&
2798 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2799 return -ENOSPC;
2800
2801 rdev->raid_disk = slot;
2802 if (test_bit(In_sync, &rdev->flags))
2803 rdev->saved_raid_disk = slot;
2804 else
2805 rdev->saved_raid_disk = -1;
2806 clear_bit(In_sync, &rdev->flags);
2807 clear_bit(Bitmap_sync, &rdev->flags);
2808 err = rdev->mddev->pers->
2809 hot_add_disk(rdev->mddev, rdev);
2810 if (err) {
2811 rdev->raid_disk = -1;
2812 return err;
2813 } else
2814 sysfs_notify_dirent_safe(rdev->sysfs_state);
2815 if (sysfs_link_rdev(rdev->mddev, rdev))
2816 ;
2817
2818 } else {
2819 if (slot >= rdev->mddev->raid_disks &&
2820 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2821 return -ENOSPC;
2822 rdev->raid_disk = slot;
2823
2824 clear_bit(Faulty, &rdev->flags);
2825 clear_bit(WriteMostly, &rdev->flags);
2826 set_bit(In_sync, &rdev->flags);
2827 sysfs_notify_dirent_safe(rdev->sysfs_state);
2828 }
2829 return len;
2830}
2831
2832
2833static struct rdev_sysfs_entry rdev_slot =
2834__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2835
2836static ssize_t
2837offset_show(struct md_rdev *rdev, char *page)
2838{
2839 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2840}
2841
2842static ssize_t
2843offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2844{
2845 unsigned long long offset;
2846 if (kstrtoull(buf, 10, &offset) < 0)
2847 return -EINVAL;
2848 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2849 return -EBUSY;
2850 if (rdev->sectors && rdev->mddev->external)
2851
2852
2853 return -EBUSY;
2854 rdev->data_offset = offset;
2855 rdev->new_data_offset = offset;
2856 return len;
2857}
2858
2859static struct rdev_sysfs_entry rdev_offset =
2860__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2861
2862static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
2863{
2864 return sprintf(page, "%llu\n",
2865 (unsigned long long)rdev->new_data_offset);
2866}
2867
2868static ssize_t new_offset_store(struct md_rdev *rdev,
2869 const char *buf, size_t len)
2870{
2871 unsigned long long new_offset;
2872 struct mddev *mddev = rdev->mddev;
2873
2874 if (kstrtoull(buf, 10, &new_offset) < 0)
2875 return -EINVAL;
2876
2877 if (mddev->sync_thread)
2878 return -EBUSY;
2879 if (new_offset == rdev->data_offset)
2880
2881 ;
2882 else if (new_offset > rdev->data_offset) {
2883
2884 if (new_offset - rdev->data_offset
2885 + mddev->dev_sectors > rdev->sectors)
2886 return -E2BIG;
2887 }
2888
2889
2890
2891
2892
2893 if (new_offset < rdev->data_offset &&
2894 mddev->reshape_backwards)
2895 return -EINVAL;
2896
2897
2898
2899
2900 if (new_offset > rdev->data_offset &&
2901 !mddev->reshape_backwards)
2902 return -EINVAL;
2903
2904 if (mddev->pers && mddev->persistent &&
2905 !super_types[mddev->major_version]
2906 .allow_new_offset(rdev, new_offset))
2907 return -E2BIG;
2908 rdev->new_data_offset = new_offset;
2909 if (new_offset > rdev->data_offset)
2910 mddev->reshape_backwards = 1;
2911 else if (new_offset < rdev->data_offset)
2912 mddev->reshape_backwards = 0;
2913
2914 return len;
2915}
2916static struct rdev_sysfs_entry rdev_new_offset =
2917__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
2918
2919static ssize_t
2920rdev_size_show(struct md_rdev *rdev, char *page)
2921{
2922 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2923}
2924
2925static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2926{
2927
2928 if (s1+l1 <= s2)
2929 return 0;
2930 if (s2+l2 <= s1)
2931 return 0;
2932 return 1;
2933}
2934
2935static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2936{
2937 unsigned long long blocks;
2938 sector_t new;
2939
2940 if (kstrtoull(buf, 10, &blocks) < 0)
2941 return -EINVAL;
2942
2943 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2944 return -EINVAL;
2945
2946 new = blocks * 2;
2947 if (new != blocks * 2)
2948 return -EINVAL;
2949
2950 *sectors = new;
2951 return 0;
2952}
2953
2954static ssize_t
2955rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
2956{
2957 struct mddev *my_mddev = rdev->mddev;
2958 sector_t oldsectors = rdev->sectors;
2959 sector_t sectors;
2960
2961 if (strict_blocks_to_sectors(buf, §ors) < 0)
2962 return -EINVAL;
2963 if (rdev->data_offset != rdev->new_data_offset)
2964 return -EINVAL;
2965 if (my_mddev->pers && rdev->raid_disk >= 0) {
2966 if (my_mddev->persistent) {
2967 sectors = super_types[my_mddev->major_version].
2968 rdev_size_change(rdev, sectors);
2969 if (!sectors)
2970 return -EBUSY;
2971 } else if (!sectors)
2972 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
2973 rdev->data_offset;
2974 if (!my_mddev->pers->resize)
2975
2976 return -EINVAL;
2977 }
2978 if (sectors < my_mddev->dev_sectors)
2979 return -EINVAL;
2980
2981 rdev->sectors = sectors;
2982 if (sectors > oldsectors && my_mddev->external) {
2983
2984
2985
2986
2987
2988 struct mddev *mddev;
2989 int overlap = 0;
2990 struct list_head *tmp;
2991
2992 mddev_unlock(my_mddev);
2993 for_each_mddev(mddev, tmp) {
2994 struct md_rdev *rdev2;
2995
2996 mddev_lock_nointr(mddev);
2997 rdev_for_each(rdev2, mddev)
2998 if (rdev->bdev == rdev2->bdev &&
2999 rdev != rdev2 &&
3000 overlaps(rdev->data_offset, rdev->sectors,
3001 rdev2->data_offset,
3002 rdev2->sectors)) {
3003 overlap = 1;
3004 break;
3005 }
3006 mddev_unlock(mddev);
3007 if (overlap) {
3008 mddev_put(mddev);
3009 break;
3010 }
3011 }
3012 mddev_lock_nointr(my_mddev);
3013 if (overlap) {
3014
3015
3016
3017
3018
3019
3020 rdev->sectors = oldsectors;
3021 return -EBUSY;
3022 }
3023 }
3024 return len;
3025}
3026
3027static struct rdev_sysfs_entry rdev_size =
3028__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3029
3030
3031static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3032{
3033 unsigned long long recovery_start = rdev->recovery_offset;
3034
3035 if (test_bit(In_sync, &rdev->flags) ||
3036 recovery_start == MaxSector)
3037 return sprintf(page, "none\n");
3038
3039 return sprintf(page, "%llu\n", recovery_start);
3040}
3041
3042static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3043{
3044 unsigned long long recovery_start;
3045
3046 if (cmd_match(buf, "none"))
3047 recovery_start = MaxSector;
3048 else if (kstrtoull(buf, 10, &recovery_start))
3049 return -EINVAL;
3050
3051 if (rdev->mddev->pers &&
3052 rdev->raid_disk >= 0)
3053 return -EBUSY;
3054
3055 rdev->recovery_offset = recovery_start;
3056 if (recovery_start == MaxSector)
3057 set_bit(In_sync, &rdev->flags);
3058 else
3059 clear_bit(In_sync, &rdev->flags);
3060 return len;
3061}
3062
3063static struct rdev_sysfs_entry rdev_recovery_start =
3064__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3065
3066
3067static ssize_t
3068badblocks_show(struct badblocks *bb, char *page, int unack);
3069static ssize_t
3070badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack);
3071
3072static ssize_t bb_show(struct md_rdev *rdev, char *page)
3073{
3074 return badblocks_show(&rdev->badblocks, page, 0);
3075}
3076static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3077{
3078 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3079
3080 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3081 wake_up(&rdev->blocked_wait);
3082 return rv;
3083}
3084static struct rdev_sysfs_entry rdev_bad_blocks =
3085__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3086
3087
3088static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3089{
3090 return badblocks_show(&rdev->badblocks, page, 1);
3091}
3092static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3093{
3094 return badblocks_store(&rdev->badblocks, page, len, 1);
3095}
3096static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3097__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3098
3099static struct attribute *rdev_default_attrs[] = {
3100 &rdev_state.attr,
3101 &rdev_errors.attr,
3102 &rdev_slot.attr,
3103 &rdev_offset.attr,
3104 &rdev_new_offset.attr,
3105 &rdev_size.attr,
3106 &rdev_recovery_start.attr,
3107 &rdev_bad_blocks.attr,
3108 &rdev_unack_bad_blocks.attr,
3109 NULL,
3110};
3111static ssize_t
3112rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3113{
3114 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3115 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3116 struct mddev *mddev = rdev->mddev;
3117 ssize_t rv;
3118
3119 if (!entry->show)
3120 return -EIO;
3121
3122 rv = mddev ? mddev_lock(mddev) : -EBUSY;
3123 if (!rv) {
3124 if (rdev->mddev == NULL)
3125 rv = -EBUSY;
3126 else
3127 rv = entry->show(rdev, page);
3128 mddev_unlock(mddev);
3129 }
3130 return rv;
3131}
3132
3133static ssize_t
3134rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3135 const char *page, size_t length)
3136{
3137 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3138 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3139 ssize_t rv;
3140 struct mddev *mddev = rdev->mddev;
3141
3142 if (!entry->store)
3143 return -EIO;
3144 if (!capable(CAP_SYS_ADMIN))
3145 return -EACCES;
3146 rv = mddev ? mddev_lock(mddev): -EBUSY;
3147 if (!rv) {
3148 if (rdev->mddev == NULL)
3149 rv = -EBUSY;
3150 else
3151 rv = entry->store(rdev, page, length);
3152 mddev_unlock(mddev);
3153 }
3154 return rv;
3155}
3156
3157static void rdev_free(struct kobject *ko)
3158{
3159 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3160 kfree(rdev);
3161}
3162static const struct sysfs_ops rdev_sysfs_ops = {
3163 .show = rdev_attr_show,
3164 .store = rdev_attr_store,
3165};
3166static struct kobj_type rdev_ktype = {
3167 .release = rdev_free,
3168 .sysfs_ops = &rdev_sysfs_ops,
3169 .default_attrs = rdev_default_attrs,
3170};
3171
3172int md_rdev_init(struct md_rdev *rdev)
3173{
3174 rdev->desc_nr = -1;
3175 rdev->saved_raid_disk = -1;
3176 rdev->raid_disk = -1;
3177 rdev->flags = 0;
3178 rdev->data_offset = 0;
3179 rdev->new_data_offset = 0;
3180 rdev->sb_events = 0;
3181 rdev->last_read_error.tv_sec = 0;
3182 rdev->last_read_error.tv_nsec = 0;
3183 rdev->sb_loaded = 0;
3184 rdev->bb_page = NULL;
3185 atomic_set(&rdev->nr_pending, 0);
3186 atomic_set(&rdev->read_errors, 0);
3187 atomic_set(&rdev->corrected_errors, 0);
3188
3189 INIT_LIST_HEAD(&rdev->same_set);
3190 init_waitqueue_head(&rdev->blocked_wait);
3191
3192
3193
3194
3195
3196 rdev->badblocks.count = 0;
3197 rdev->badblocks.shift = -1;
3198 rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
3199 seqlock_init(&rdev->badblocks.lock);
3200 if (rdev->badblocks.page == NULL)
3201 return -ENOMEM;
3202
3203 return 0;
3204}
3205EXPORT_SYMBOL_GPL(md_rdev_init);
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3217{
3218 char b[BDEVNAME_SIZE];
3219 int err;
3220 struct md_rdev *rdev;
3221 sector_t size;
3222
3223 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3224 if (!rdev) {
3225 printk(KERN_ERR "md: could not alloc mem for new device!\n");
3226 return ERR_PTR(-ENOMEM);
3227 }
3228
3229 err = md_rdev_init(rdev);
3230 if (err)
3231 goto abort_free;
3232 err = alloc_disk_sb(rdev);
3233 if (err)
3234 goto abort_free;
3235
3236 err = lock_rdev(rdev, newdev, super_format == -2);
3237 if (err)
3238 goto abort_free;
3239
3240 kobject_init(&rdev->kobj, &rdev_ktype);
3241
3242 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3243 if (!size) {
3244 printk(KERN_WARNING
3245 "md: %s has zero or unknown size, marking faulty!\n",
3246 bdevname(rdev->bdev,b));
3247 err = -EINVAL;
3248 goto abort_free;
3249 }
3250
3251 if (super_format >= 0) {
3252 err = super_types[super_format].
3253 load_super(rdev, NULL, super_minor);
3254 if (err == -EINVAL) {
3255 printk(KERN_WARNING
3256 "md: %s does not have a valid v%d.%d "
3257 "superblock, not importing!\n",
3258 bdevname(rdev->bdev,b),
3259 super_format, super_minor);
3260 goto abort_free;
3261 }
3262 if (err < 0) {
3263 printk(KERN_WARNING
3264 "md: could not read %s's sb, not importing!\n",
3265 bdevname(rdev->bdev,b));
3266 goto abort_free;
3267 }
3268 }
3269
3270 return rdev;
3271
3272abort_free:
3273 if (rdev->bdev)
3274 unlock_rdev(rdev);
3275 md_rdev_clear(rdev);
3276 kfree(rdev);
3277 return ERR_PTR(err);
3278}
3279
3280
3281
3282
3283
3284
3285static void analyze_sbs(struct mddev * mddev)
3286{
3287 int i;
3288 struct md_rdev *rdev, *freshest, *tmp;
3289 char b[BDEVNAME_SIZE];
3290
3291 freshest = NULL;
3292 rdev_for_each_safe(rdev, tmp, mddev)
3293 switch (super_types[mddev->major_version].
3294 load_super(rdev, freshest, mddev->minor_version)) {
3295 case 1:
3296 freshest = rdev;
3297 break;
3298 case 0:
3299 break;
3300 default:
3301 printk( KERN_ERR \
3302 "md: fatal superblock inconsistency in %s"
3303 " -- removing from array\n",
3304 bdevname(rdev->bdev,b));
3305 kick_rdev_from_array(rdev);
3306 }
3307
3308
3309 super_types[mddev->major_version].
3310 validate_super(mddev, freshest);
3311
3312 i = 0;
3313 rdev_for_each_safe(rdev, tmp, mddev) {
3314 if (mddev->max_disks &&
3315 (rdev->desc_nr >= mddev->max_disks ||
3316 i > mddev->max_disks)) {
3317 printk(KERN_WARNING
3318 "md: %s: %s: only %d devices permitted\n",
3319 mdname(mddev), bdevname(rdev->bdev, b),
3320 mddev->max_disks);
3321 kick_rdev_from_array(rdev);
3322 continue;
3323 }
3324 if (rdev != freshest)
3325 if (super_types[mddev->major_version].
3326 validate_super(mddev, rdev)) {
3327 printk(KERN_WARNING "md: kicking non-fresh %s"
3328 " from array!\n",
3329 bdevname(rdev->bdev,b));
3330 kick_rdev_from_array(rdev);
3331 continue;
3332 }
3333 if (mddev->level == LEVEL_MULTIPATH) {
3334 rdev->desc_nr = i++;
3335 rdev->raid_disk = rdev->desc_nr;
3336 set_bit(In_sync, &rdev->flags);
3337 } else if (rdev->raid_disk >= (mddev->raid_disks - min(0, mddev->delta_disks))) {
3338 rdev->raid_disk = -1;
3339 clear_bit(In_sync, &rdev->flags);
3340 }
3341 }
3342}
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3355{
3356 unsigned long result = 0;
3357 long decimals = -1;
3358 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3359 if (*cp == '.')
3360 decimals = 0;
3361 else if (decimals < scale) {
3362 unsigned int value;
3363 value = *cp - '0';
3364 result = result * 10 + value;
3365 if (decimals >= 0)
3366 decimals++;
3367 }
3368 cp++;
3369 }
3370 if (*cp == '\n')
3371 cp++;
3372 if (*cp)
3373 return -EINVAL;
3374 if (decimals < 0)
3375 decimals = 0;
3376 while (decimals < scale) {
3377 result *= 10;
3378 decimals ++;
3379 }
3380 *res = result;
3381 return 0;
3382}
3383
3384
3385static void md_safemode_timeout(unsigned long data);
3386
3387static ssize_t
3388safe_delay_show(struct mddev *mddev, char *page)
3389{
3390 int msec = (mddev->safemode_delay*1000)/HZ;
3391 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3392}
3393static ssize_t
3394safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3395{
3396 unsigned long msec;
3397
3398 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3399 return -EINVAL;
3400 if (msec == 0)
3401 mddev->safemode_delay = 0;
3402 else {
3403 unsigned long old_delay = mddev->safemode_delay;
3404 mddev->safemode_delay = (msec*HZ)/1000;
3405 if (mddev->safemode_delay == 0)
3406 mddev->safemode_delay = 1;
3407 if (mddev->safemode_delay < old_delay || old_delay == 0)
3408 md_safemode_timeout((unsigned long)mddev);
3409 }
3410 return len;
3411}
3412static struct md_sysfs_entry md_safe_delay =
3413__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3414
3415static ssize_t
3416level_show(struct mddev *mddev, char *page)
3417{
3418 struct md_personality *p = mddev->pers;
3419 if (p)
3420 return sprintf(page, "%s\n", p->name);
3421 else if (mddev->clevel[0])
3422 return sprintf(page, "%s\n", mddev->clevel);
3423 else if (mddev->level != LEVEL_NONE)
3424 return sprintf(page, "%d\n", mddev->level);
3425 else
3426 return 0;
3427}
3428
3429static ssize_t
3430level_store(struct mddev *mddev, const char *buf, size_t len)
3431{
3432 char clevel[16];
3433 ssize_t rv = len;
3434 struct md_personality *pers;
3435 long level;
3436 void *priv;
3437 struct md_rdev *rdev;
3438
3439 if (mddev->pers == NULL) {
3440 if (len == 0)
3441 return 0;
3442 if (len >= sizeof(mddev->clevel))
3443 return -ENOSPC;
3444 strncpy(mddev->clevel, buf, len);
3445 if (mddev->clevel[len-1] == '\n')
3446 len--;
3447 mddev->clevel[len] = 0;
3448 mddev->level = LEVEL_NONE;
3449 return rv;
3450 }
3451
3452
3453
3454
3455
3456
3457
3458 if (mddev->sync_thread ||
3459 mddev->reshape_position != MaxSector ||
3460 mddev->sysfs_active)
3461 return -EBUSY;
3462
3463 if (!mddev->pers->quiesce) {
3464 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
3465 mdname(mddev), mddev->pers->name);
3466 return -EINVAL;
3467 }
3468
3469
3470 if (len == 0 || len >= sizeof(clevel))
3471 return -EINVAL;
3472 strncpy(clevel, buf, len);
3473 if (clevel[len-1] == '\n')
3474 len--;
3475 clevel[len] = 0;
3476 if (kstrtol(clevel, 10, &level))
3477 level = LEVEL_NONE;
3478
3479 if (request_module("md-%s", clevel) != 0)
3480 request_module("md-level-%s", clevel);
3481 spin_lock(&pers_lock);
3482 pers = find_pers(level, clevel);
3483 if (!pers || !try_module_get(pers->owner)) {
3484 spin_unlock(&pers_lock);
3485 printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
3486 return -EINVAL;
3487 }
3488 spin_unlock(&pers_lock);
3489
3490 if (pers == mddev->pers) {
3491
3492 module_put(pers->owner);
3493 return rv;
3494 }
3495 if (!pers->takeover) {
3496 module_put(pers->owner);
3497 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
3498 mdname(mddev), clevel);
3499 return -EINVAL;
3500 }
3501
3502 rdev_for_each(rdev, mddev)
3503 rdev->new_raid_disk = rdev->raid_disk;
3504
3505
3506
3507
3508 priv = pers->takeover(mddev);
3509 if (IS_ERR(priv)) {
3510 mddev->new_level = mddev->level;
3511 mddev->new_layout = mddev->layout;
3512 mddev->new_chunk_sectors = mddev->chunk_sectors;
3513 mddev->raid_disks -= mddev->delta_disks;
3514 mddev->delta_disks = 0;
3515 mddev->reshape_backwards = 0;
3516 module_put(pers->owner);
3517 printk(KERN_WARNING "md: %s: %s would not accept array\n",
3518 mdname(mddev), clevel);
3519 return PTR_ERR(priv);
3520 }
3521
3522
3523 mddev_suspend(mddev);
3524 mddev->pers->stop(mddev);
3525
3526 if (mddev->pers->sync_request == NULL &&
3527 pers->sync_request != NULL) {
3528
3529 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3530 printk(KERN_WARNING
3531 "md: cannot register extra attributes for %s\n",
3532 mdname(mddev));
3533 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
3534 }
3535 if (mddev->pers->sync_request != NULL &&
3536 pers->sync_request == NULL) {
3537
3538 if (mddev->to_remove == NULL)
3539 mddev->to_remove = &md_redundancy_group;
3540 }
3541
3542 if (mddev->pers->sync_request == NULL &&
3543 mddev->external) {
3544
3545
3546
3547
3548
3549
3550
3551 mddev->in_sync = 0;
3552 mddev->safemode_delay = 0;
3553 mddev->safemode = 0;
3554 }
3555
3556 rdev_for_each(rdev, mddev) {
3557 if (rdev->raid_disk < 0)
3558 continue;
3559 if (rdev->new_raid_disk >= mddev->raid_disks)
3560 rdev->new_raid_disk = -1;
3561 if (rdev->new_raid_disk == rdev->raid_disk)
3562 continue;
3563 sysfs_unlink_rdev(mddev, rdev);
3564 }
3565 rdev_for_each(rdev, mddev) {
3566 if (rdev->raid_disk < 0)
3567 continue;
3568 if (rdev->new_raid_disk == rdev->raid_disk)
3569 continue;
3570 rdev->raid_disk = rdev->new_raid_disk;
3571 if (rdev->raid_disk < 0)
3572 clear_bit(In_sync, &rdev->flags);
3573 else {
3574 if (sysfs_link_rdev(mddev, rdev))
3575 printk(KERN_WARNING "md: cannot register rd%d"
3576 " for %s after level change\n",
3577 rdev->raid_disk, mdname(mddev));
3578 }
3579 }
3580
3581 module_put(mddev->pers->owner);
3582 mddev->pers = pers;
3583 mddev->private = priv;
3584 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3585 mddev->level = mddev->new_level;
3586 mddev->layout = mddev->new_layout;
3587 mddev->chunk_sectors = mddev->new_chunk_sectors;
3588 mddev->delta_disks = 0;
3589 mddev->reshape_backwards = 0;
3590 mddev->degraded = 0;
3591 if (mddev->pers->sync_request == NULL) {
3592
3593
3594
3595 mddev->in_sync = 1;
3596 del_timer_sync(&mddev->safemode_timer);
3597 }
3598 blk_set_stacking_limits(&mddev->queue->limits);
3599 pers->run(mddev);
3600 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3601 mddev_resume(mddev);
3602 if (!mddev->thread)
3603 md_update_sb(mddev, 1);
3604 sysfs_notify(&mddev->kobj, NULL, "level");
3605 md_new_event(mddev);
3606 return rv;
3607}
3608
3609static struct md_sysfs_entry md_level =
3610__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3611
3612
3613static ssize_t
3614layout_show(struct mddev *mddev, char *page)
3615{
3616
3617 if (mddev->reshape_position != MaxSector &&
3618 mddev->layout != mddev->new_layout)
3619 return sprintf(page, "%d (%d)\n",
3620 mddev->new_layout, mddev->layout);
3621 return sprintf(page, "%d\n", mddev->layout);
3622}
3623
3624static ssize_t
3625layout_store(struct mddev *mddev, const char *buf, size_t len)
3626{
3627 char *e;
3628 unsigned long n = simple_strtoul(buf, &e, 10);
3629
3630 if (!*buf || (*e && *e != '\n'))
3631 return -EINVAL;
3632
3633 if (mddev->pers) {
3634 int err;
3635 if (mddev->pers->check_reshape == NULL)
3636 return -EBUSY;
3637 mddev->new_layout = n;
3638 err = mddev->pers->check_reshape(mddev);
3639 if (err) {
3640 mddev->new_layout = mddev->layout;
3641 return err;
3642 }
3643 } else {
3644 mddev->new_layout = n;
3645 if (mddev->reshape_position == MaxSector)
3646 mddev->layout = n;
3647 }
3648 return len;
3649}
3650static struct md_sysfs_entry md_layout =
3651__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3652
3653
3654static ssize_t
3655raid_disks_show(struct mddev *mddev, char *page)
3656{
3657 if (mddev->raid_disks == 0)
3658 return 0;
3659 if (mddev->reshape_position != MaxSector &&
3660 mddev->delta_disks != 0)
3661 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3662 mddev->raid_disks - mddev->delta_disks);
3663 return sprintf(page, "%d\n", mddev->raid_disks);
3664}
3665
3666static int update_raid_disks(struct mddev *mddev, int raid_disks);
3667
3668static ssize_t
3669raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3670{
3671 char *e;
3672 int rv = 0;
3673 unsigned long n = simple_strtoul(buf, &e, 10);
3674
3675 if (!*buf || (*e && *e != '\n'))
3676 return -EINVAL;
3677
3678 if (mddev->pers)
3679 rv = update_raid_disks(mddev, n);
3680 else if (mddev->reshape_position != MaxSector) {
3681 struct md_rdev *rdev;
3682 int olddisks = mddev->raid_disks - mddev->delta_disks;
3683
3684 rdev_for_each(rdev, mddev) {
3685 if (olddisks < n &&
3686 rdev->data_offset < rdev->new_data_offset)
3687 return -EINVAL;
3688 if (olddisks > n &&
3689 rdev->data_offset > rdev->new_data_offset)
3690 return -EINVAL;
3691 }
3692 mddev->delta_disks = n - olddisks;
3693 mddev->raid_disks = n;
3694 mddev->reshape_backwards = (mddev->delta_disks < 0);
3695 } else
3696 mddev->raid_disks = n;
3697 return rv ? rv : len;
3698}
3699static struct md_sysfs_entry md_raid_disks =
3700__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3701
3702static ssize_t
3703chunk_size_show(struct mddev *mddev, char *page)
3704{
3705 if (mddev->reshape_position != MaxSector &&
3706 mddev->chunk_sectors != mddev->new_chunk_sectors)
3707 return sprintf(page, "%d (%d)\n",
3708 mddev->new_chunk_sectors << 9,
3709 mddev->chunk_sectors << 9);
3710 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3711}
3712
3713static ssize_t
3714chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3715{
3716 char *e;
3717 unsigned long n = simple_strtoul(buf, &e, 10);
3718
3719 if (!*buf || (*e && *e != '\n'))
3720 return -EINVAL;
3721
3722 if (mddev->pers) {
3723 int err;
3724 if (mddev->pers->check_reshape == NULL)
3725 return -EBUSY;
3726 mddev->new_chunk_sectors = n >> 9;
3727 err = mddev->pers->check_reshape(mddev);
3728 if (err) {
3729 mddev->new_chunk_sectors = mddev->chunk_sectors;
3730 return err;
3731 }
3732 } else {
3733 mddev->new_chunk_sectors = n >> 9;
3734 if (mddev->reshape_position == MaxSector)
3735 mddev->chunk_sectors = n >> 9;
3736 }
3737 return len;
3738}
3739static struct md_sysfs_entry md_chunk_size =
3740__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3741
3742static ssize_t
3743resync_start_show(struct mddev *mddev, char *page)
3744{
3745 if (mddev->recovery_cp == MaxSector)
3746 return sprintf(page, "none\n");
3747 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3748}
3749
3750static ssize_t
3751resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3752{
3753 char *e;
3754 unsigned long long n = simple_strtoull(buf, &e, 10);
3755
3756 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3757 return -EBUSY;
3758 if (cmd_match(buf, "none"))
3759 n = MaxSector;
3760 else if (!*buf || (*e && *e != '\n'))
3761 return -EINVAL;
3762
3763 mddev->recovery_cp = n;
3764 if (mddev->pers)
3765 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3766 return len;
3767}
3768static struct md_sysfs_entry md_resync_start =
3769__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3808 write_pending, active_idle, bad_word};
3809static char *array_states[] = {
3810 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3811 "write-pending", "active-idle", NULL };
3812
3813static int match_word(const char *word, char **list)
3814{
3815 int n;
3816 for (n=0; list[n]; n++)
3817 if (cmd_match(word, list[n]))
3818 break;
3819 return n;
3820}
3821
3822static ssize_t
3823array_state_show(struct mddev *mddev, char *page)
3824{
3825 enum array_state st = inactive;
3826
3827 if (mddev->pers)
3828 switch(mddev->ro) {
3829 case 1:
3830 st = readonly;
3831 break;
3832 case 2:
3833 st = read_auto;
3834 break;
3835 case 0:
3836 if (mddev->in_sync)
3837 st = clean;
3838 else if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
3839 st = write_pending;
3840 else if (mddev->safemode)
3841 st = active_idle;
3842 else
3843 st = active;
3844 }
3845 else {
3846 if (list_empty(&mddev->disks) &&
3847 mddev->raid_disks == 0 &&
3848 mddev->dev_sectors == 0)
3849 st = clear;
3850 else
3851 st = inactive;
3852 }
3853 return sprintf(page, "%s\n", array_states[st]);
3854}
3855
3856static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev);
3857static int md_set_readonly(struct mddev * mddev, struct block_device *bdev);
3858static int do_md_run(struct mddev * mddev);
3859static int restart_array(struct mddev *mddev);
3860
3861static ssize_t
3862array_state_store(struct mddev *mddev, const char *buf, size_t len)
3863{
3864 int err = -EINVAL;
3865 enum array_state st = match_word(buf, array_states);
3866 switch(st) {
3867 case bad_word:
3868 break;
3869 case clear:
3870
3871 err = do_md_stop(mddev, 0, NULL);
3872 break;
3873 case inactive:
3874
3875 if (mddev->pers)
3876 err = do_md_stop(mddev, 2, NULL);
3877 else
3878 err = 0;
3879 break;
3880 case suspended:
3881 break;
3882 case readonly:
3883 if (mddev->pers)
3884 err = md_set_readonly(mddev, NULL);
3885 else {
3886 mddev->ro = 1;
3887 set_disk_ro(mddev->gendisk, 1);
3888 err = do_md_run(mddev);
3889 }
3890 break;
3891 case read_auto:
3892 if (mddev->pers) {
3893 if (mddev->ro == 0)
3894 err = md_set_readonly(mddev, NULL);
3895 else if (mddev->ro == 1)
3896 err = restart_array(mddev);
3897 if (err == 0) {
3898 mddev->ro = 2;
3899 set_disk_ro(mddev->gendisk, 0);
3900 }
3901 } else {
3902 mddev->ro = 2;
3903 err = do_md_run(mddev);
3904 }
3905 break;
3906 case clean:
3907 if (mddev->pers) {
3908 restart_array(mddev);
3909 spin_lock_irq(&mddev->write_lock);
3910 if (atomic_read(&mddev->writes_pending) == 0) {
3911 if (mddev->in_sync == 0) {
3912 mddev->in_sync = 1;
3913 if (mddev->safemode == 1)
3914 mddev->safemode = 0;
3915 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3916 }
3917 err = 0;
3918 } else
3919 err = -EBUSY;
3920 spin_unlock_irq(&mddev->write_lock);
3921 } else
3922 err = -EINVAL;
3923 break;
3924 case active:
3925 if (mddev->pers) {
3926 restart_array(mddev);
3927 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
3928 wake_up(&mddev->sb_wait);
3929 err = 0;
3930 } else {
3931 mddev->ro = 0;
3932 set_disk_ro(mddev->gendisk, 0);
3933 err = do_md_run(mddev);
3934 }
3935 break;
3936 case write_pending:
3937 case active_idle:
3938
3939 break;
3940 }
3941 if (err)
3942 return err;
3943 else {
3944 if (mddev->hold_active == UNTIL_IOCTL)
3945 mddev->hold_active = 0;
3946 sysfs_notify_dirent_safe(mddev->sysfs_state);
3947 return len;
3948 }
3949}
3950static struct md_sysfs_entry md_array_state =
3951__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
3952
3953static ssize_t
3954max_corrected_read_errors_show(struct mddev *mddev, char *page) {
3955 return sprintf(page, "%d\n",
3956 atomic_read(&mddev->max_corr_read_errors));
3957}
3958
3959static ssize_t
3960max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
3961{
3962 char *e;
3963 unsigned long n = simple_strtoul(buf, &e, 10);
3964
3965 if (*buf && (*e == 0 || *e == '\n')) {
3966 atomic_set(&mddev->max_corr_read_errors, n);
3967 return len;
3968 }
3969 return -EINVAL;
3970}
3971
3972static struct md_sysfs_entry max_corr_read_errors =
3973__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
3974 max_corrected_read_errors_store);
3975
3976static ssize_t
3977null_show(struct mddev *mddev, char *page)
3978{
3979 return -EINVAL;
3980}
3981
3982static ssize_t
3983new_dev_store(struct mddev *mddev, const char *buf, size_t len)
3984{
3985
3986
3987
3988
3989
3990
3991
3992 char *e;
3993 int major = simple_strtoul(buf, &e, 10);
3994 int minor;
3995 dev_t dev;
3996 struct md_rdev *rdev;
3997 int err;
3998
3999 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4000 return -EINVAL;
4001 minor = simple_strtoul(e+1, &e, 10);
4002 if (*e && *e != '\n')
4003 return -EINVAL;
4004 dev = MKDEV(major, minor);
4005 if (major != MAJOR(dev) ||
4006 minor != MINOR(dev))
4007 return -EOVERFLOW;
4008
4009
4010 if (mddev->persistent) {
4011 rdev = md_import_device(dev, mddev->major_version,
4012 mddev->minor_version);
4013 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4014 struct md_rdev *rdev0
4015 = list_entry(mddev->disks.next,
4016 struct md_rdev, same_set);
4017 err = super_types[mddev->major_version]
4018 .load_super(rdev, rdev0, mddev->minor_version);
4019 if (err < 0)
4020 goto out;
4021 }
4022 } else if (mddev->external)
4023 rdev = md_import_device(dev, -2, -1);
4024 else
4025 rdev = md_import_device(dev, -1, -1);
4026
4027 if (IS_ERR(rdev))
4028 return PTR_ERR(rdev);
4029 err = bind_rdev_to_array(rdev, mddev);
4030 out:
4031 if (err)
4032 export_rdev(rdev);
4033 return err ? err : len;
4034}
4035
4036static struct md_sysfs_entry md_new_device =
4037__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4038
4039static ssize_t
4040bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4041{
4042 char *end;
4043 unsigned long chunk, end_chunk;
4044
4045 if (!mddev->bitmap)
4046 goto out;
4047
4048 while (*buf) {
4049 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4050 if (buf == end) break;
4051 if (*end == '-') {
4052 buf = end + 1;
4053 end_chunk = simple_strtoul(buf, &end, 0);
4054 if (buf == end) break;
4055 }
4056 if (*end && !isspace(*end)) break;
4057 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4058 buf = skip_spaces(end);
4059 }
4060 bitmap_unplug(mddev->bitmap);
4061out:
4062 return len;
4063}
4064
4065static struct md_sysfs_entry md_bitmap =
4066__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4067
4068static ssize_t
4069size_show(struct mddev *mddev, char *page)
4070{
4071 return sprintf(page, "%llu\n",
4072 (unsigned long long)mddev->dev_sectors / 2);
4073}
4074
4075static int update_size(struct mddev *mddev, sector_t num_sectors);
4076
4077static ssize_t
4078size_store(struct mddev *mddev, const char *buf, size_t len)
4079{
4080
4081
4082
4083
4084 sector_t sectors;
4085 int err = strict_blocks_to_sectors(buf, §ors);
4086
4087 if (err < 0)
4088 return err;
4089 if (mddev->pers) {
4090 err = update_size(mddev, sectors);
4091 md_update_sb(mddev, 1);
4092 } else {
4093 if (mddev->dev_sectors == 0 ||
4094 mddev->dev_sectors > sectors)
4095 mddev->dev_sectors = sectors;
4096 else
4097 err = -ENOSPC;
4098 }
4099 return err ? err : len;
4100}
4101
4102static struct md_sysfs_entry md_size =
4103__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4104
4105
4106
4107
4108
4109
4110
4111
4112static ssize_t
4113metadata_show(struct mddev *mddev, char *page)
4114{
4115 if (mddev->persistent)
4116 return sprintf(page, "%d.%d\n",
4117 mddev->major_version, mddev->minor_version);
4118 else if (mddev->external)
4119 return sprintf(page, "external:%s\n", mddev->metadata_type);
4120 else
4121 return sprintf(page, "none\n");
4122}
4123
4124static ssize_t
4125metadata_store(struct mddev *mddev, const char *buf, size_t len)
4126{
4127 int major, minor;
4128 char *e;
4129
4130
4131
4132
4133 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4134 ;
4135 else if (!list_empty(&mddev->disks))
4136 return -EBUSY;
4137
4138 if (cmd_match(buf, "none")) {
4139 mddev->persistent = 0;
4140 mddev->external = 0;
4141 mddev->major_version = 0;
4142 mddev->minor_version = 90;
4143 return len;
4144 }
4145 if (strncmp(buf, "external:", 9) == 0) {
4146 size_t namelen = len-9;
4147 if (namelen >= sizeof(mddev->metadata_type))
4148 namelen = sizeof(mddev->metadata_type)-1;
4149 strncpy(mddev->metadata_type, buf+9, namelen);
4150 mddev->metadata_type[namelen] = 0;
4151 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4152 mddev->metadata_type[--namelen] = 0;
4153 mddev->persistent = 0;
4154 mddev->external = 1;
4155 mddev->major_version = 0;
4156 mddev->minor_version = 90;
4157 return len;
4158 }
4159 major = simple_strtoul(buf, &e, 10);
4160 if (e==buf || *e != '.')
4161 return -EINVAL;
4162 buf = e+1;
4163 minor = simple_strtoul(buf, &e, 10);
4164 if (e==buf || (*e && *e != '\n') )
4165 return -EINVAL;
4166 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4167 return -ENOENT;
4168 mddev->major_version = major;
4169 mddev->minor_version = minor;
4170 mddev->persistent = 1;
4171 mddev->external = 0;
4172 return len;
4173}
4174
4175static struct md_sysfs_entry md_metadata =
4176__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4177
4178static ssize_t
4179action_show(struct mddev *mddev, char *page)
4180{
4181 char *type = "idle";
4182 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4183 type = "frozen";
4184 else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4185 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
4186 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4187 type = "reshape";
4188 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
4189 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
4190 type = "resync";
4191 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
4192 type = "check";
4193 else
4194 type = "repair";
4195 } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
4196 type = "recover";
4197 }
4198 return sprintf(page, "%s\n", type);
4199}
4200
4201static ssize_t
4202action_store(struct mddev *mddev, const char *page, size_t len)
4203{
4204 if (!mddev->pers || !mddev->pers->sync_request)
4205 return -EINVAL;
4206
4207 if (cmd_match(page, "frozen"))
4208 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4209 else
4210 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4211
4212 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4213 if (mddev->sync_thread) {
4214 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4215 md_reap_sync_thread(mddev);
4216 }
4217 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4218 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
4219 return -EBUSY;
4220 else if (cmd_match(page, "resync"))
4221 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4222 else if (cmd_match(page, "recover")) {
4223 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4224 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4225 } else if (cmd_match(page, "reshape")) {
4226 int err;
4227 if (mddev->pers->start_reshape == NULL)
4228 return -EINVAL;
4229 err = mddev->pers->start_reshape(mddev);
4230 if (err)
4231 return err;
4232 sysfs_notify(&mddev->kobj, NULL, "degraded");
4233 } else {
4234 if (cmd_match(page, "check"))
4235 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4236 else if (!cmd_match(page, "repair"))
4237 return -EINVAL;
4238 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4239 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4240 }
4241 if (mddev->ro == 2) {
4242
4243
4244
4245 mddev->ro = 0;
4246 md_wakeup_thread(mddev->sync_thread);
4247 }
4248 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4249 md_wakeup_thread(mddev->thread);
4250 sysfs_notify_dirent_safe(mddev->sysfs_action);
4251 return len;
4252}
4253
4254static struct md_sysfs_entry md_scan_mode =
4255__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4256
4257static ssize_t
4258last_sync_action_show(struct mddev *mddev, char *page)
4259{
4260 return sprintf(page, "%s\n", mddev->last_sync_action);
4261}
4262
4263static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4264
4265static ssize_t
4266mismatch_cnt_show(struct mddev *mddev, char *page)
4267{
4268 return sprintf(page, "%llu\n",
4269 (unsigned long long)
4270 atomic64_read(&mddev->resync_mismatches));
4271}
4272
4273static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4274
4275static ssize_t
4276sync_min_show(struct mddev *mddev, char *page)
4277{
4278 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4279 mddev->sync_speed_min ? "local": "system");
4280}
4281
4282static ssize_t
4283sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4284{
4285 int min;
4286 char *e;
4287 if (strncmp(buf, "system", 6)==0) {
4288 mddev->sync_speed_min = 0;
4289 return len;
4290 }
4291 min = simple_strtoul(buf, &e, 10);
4292 if (buf == e || (*e && *e != '\n') || min <= 0)
4293 return -EINVAL;
4294 mddev->sync_speed_min = min;
4295 return len;
4296}
4297
4298static struct md_sysfs_entry md_sync_min =
4299__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4300
4301static ssize_t
4302sync_max_show(struct mddev *mddev, char *page)
4303{
4304 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4305 mddev->sync_speed_max ? "local": "system");
4306}
4307
4308static ssize_t
4309sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4310{
4311 int max;
4312 char *e;
4313 if (strncmp(buf, "system", 6)==0) {
4314 mddev->sync_speed_max = 0;
4315 return len;
4316 }
4317 max = simple_strtoul(buf, &e, 10);
4318 if (buf == e || (*e && *e != '\n') || max <= 0)
4319 return -EINVAL;
4320 mddev->sync_speed_max = max;
4321 return len;
4322}
4323
4324static struct md_sysfs_entry md_sync_max =
4325__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4326
4327static ssize_t
4328degraded_show(struct mddev *mddev, char *page)
4329{
4330 return sprintf(page, "%d\n", mddev->degraded);
4331}
4332static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4333
4334static ssize_t
4335sync_force_parallel_show(struct mddev *mddev, char *page)
4336{
4337 return sprintf(page, "%d\n", mddev->parallel_resync);
4338}
4339
4340static ssize_t
4341sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4342{
4343 long n;
4344
4345 if (kstrtol(buf, 10, &n))
4346 return -EINVAL;
4347
4348 if (n != 0 && n != 1)
4349 return -EINVAL;
4350
4351 mddev->parallel_resync = n;
4352
4353 if (mddev->sync_thread)
4354 wake_up(&resync_wait);
4355
4356 return len;
4357}
4358
4359
4360static struct md_sysfs_entry md_sync_force_parallel =
4361__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4362 sync_force_parallel_show, sync_force_parallel_store);
4363
4364static ssize_t
4365sync_speed_show(struct mddev *mddev, char *page)
4366{
4367 unsigned long resync, dt, db;
4368 if (mddev->curr_resync == 0)
4369 return sprintf(page, "none\n");
4370 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4371 dt = (jiffies - mddev->resync_mark) / HZ;
4372 if (!dt) dt++;
4373 db = resync - mddev->resync_mark_cnt;
4374 return sprintf(page, "%lu\n", db/dt/2);
4375}
4376
4377static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4378
4379static ssize_t
4380sync_completed_show(struct mddev *mddev, char *page)
4381{
4382 unsigned long long max_sectors, resync;
4383
4384 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4385 return sprintf(page, "none\n");
4386
4387 if (mddev->curr_resync == 1 ||
4388 mddev->curr_resync == 2)
4389 return sprintf(page, "delayed\n");
4390
4391 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4392 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4393 max_sectors = mddev->resync_max_sectors;
4394 else
4395 max_sectors = mddev->dev_sectors;
4396
4397 resync = mddev->curr_resync_completed;
4398 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4399}
4400
4401static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
4402
4403static ssize_t
4404min_sync_show(struct mddev *mddev, char *page)
4405{
4406 return sprintf(page, "%llu\n",
4407 (unsigned long long)mddev->resync_min);
4408}
4409static ssize_t
4410min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4411{
4412 unsigned long long min;
4413 if (kstrtoull(buf, 10, &min))
4414 return -EINVAL;
4415 if (min > mddev->resync_max)
4416 return -EINVAL;
4417 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4418 return -EBUSY;
4419
4420
4421 if (mddev->chunk_sectors) {
4422 sector_t temp = min;
4423 if (sector_div(temp, mddev->chunk_sectors))
4424 return -EINVAL;
4425 }
4426 mddev->resync_min = min;
4427
4428 return len;
4429}
4430
4431static struct md_sysfs_entry md_min_sync =
4432__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4433
4434static ssize_t
4435max_sync_show(struct mddev *mddev, char *page)
4436{
4437 if (mddev->resync_max == MaxSector)
4438 return sprintf(page, "max\n");
4439 else
4440 return sprintf(page, "%llu\n",
4441 (unsigned long long)mddev->resync_max);
4442}
4443static ssize_t
4444max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4445{
4446 if (strncmp(buf, "max", 3) == 0)
4447 mddev->resync_max = MaxSector;
4448 else {
4449 unsigned long long max;
4450 if (kstrtoull(buf, 10, &max))
4451 return -EINVAL;
4452 if (max < mddev->resync_min)
4453 return -EINVAL;
4454 if (max < mddev->resync_max &&
4455 mddev->ro == 0 &&
4456 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4457 return -EBUSY;
4458
4459
4460 if (mddev->chunk_sectors) {
4461 sector_t temp = max;
4462 if (sector_div(temp, mddev->chunk_sectors))
4463 return -EINVAL;
4464 }
4465 mddev->resync_max = max;
4466 }
4467 wake_up(&mddev->recovery_wait);
4468 return len;
4469}
4470
4471static struct md_sysfs_entry md_max_sync =
4472__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4473
4474static ssize_t
4475suspend_lo_show(struct mddev *mddev, char *page)
4476{
4477 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4478}
4479
4480static ssize_t
4481suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4482{
4483 char *e;
4484 unsigned long long new = simple_strtoull(buf, &e, 10);
4485 unsigned long long old = mddev->suspend_lo;
4486
4487 if (mddev->pers == NULL ||
4488 mddev->pers->quiesce == NULL)
4489 return -EINVAL;
4490 if (buf == e || (*e && *e != '\n'))
4491 return -EINVAL;
4492
4493 mddev->suspend_lo = new;
4494 if (new >= old)
4495
4496 mddev->pers->quiesce(mddev, 2);
4497 else {
4498
4499 mddev->pers->quiesce(mddev, 1);
4500 mddev->pers->quiesce(mddev, 0);
4501 }
4502 return len;
4503}
4504static struct md_sysfs_entry md_suspend_lo =
4505__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4506
4507
4508static ssize_t
4509suspend_hi_show(struct mddev *mddev, char *page)
4510{
4511 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4512}
4513
4514static ssize_t
4515suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4516{
4517 char *e;
4518 unsigned long long new = simple_strtoull(buf, &e, 10);
4519 unsigned long long old = mddev->suspend_hi;
4520
4521 if (mddev->pers == NULL ||
4522 mddev->pers->quiesce == NULL)
4523 return -EINVAL;
4524 if (buf == e || (*e && *e != '\n'))
4525 return -EINVAL;
4526
4527 mddev->suspend_hi = new;
4528 if (new <= old)
4529
4530 mddev->pers->quiesce(mddev, 2);
4531 else {
4532
4533 mddev->pers->quiesce(mddev, 1);
4534 mddev->pers->quiesce(mddev, 0);
4535 }
4536 return len;
4537}
4538static struct md_sysfs_entry md_suspend_hi =
4539__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4540
4541static ssize_t
4542reshape_position_show(struct mddev *mddev, char *page)
4543{
4544 if (mddev->reshape_position != MaxSector)
4545 return sprintf(page, "%llu\n",
4546 (unsigned long long)mddev->reshape_position);
4547 strcpy(page, "none\n");
4548 return 5;
4549}
4550
4551static ssize_t
4552reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4553{
4554 struct md_rdev *rdev;
4555 char *e;
4556 unsigned long long new = simple_strtoull(buf, &e, 10);
4557 if (mddev->pers)
4558 return -EBUSY;
4559 if (buf == e || (*e && *e != '\n'))
4560 return -EINVAL;
4561 mddev->reshape_position = new;
4562 mddev->delta_disks = 0;
4563 mddev->reshape_backwards = 0;
4564 mddev->new_level = mddev->level;
4565 mddev->new_layout = mddev->layout;
4566 mddev->new_chunk_sectors = mddev->chunk_sectors;
4567 rdev_for_each(rdev, mddev)
4568 rdev->new_data_offset = rdev->data_offset;
4569 return len;
4570}
4571
4572static struct md_sysfs_entry md_reshape_position =
4573__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4574 reshape_position_store);
4575
4576static ssize_t
4577reshape_direction_show(struct mddev *mddev, char *page)
4578{
4579 return sprintf(page, "%s\n",
4580 mddev->reshape_backwards ? "backwards" : "forwards");
4581}
4582
4583static ssize_t
4584reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4585{
4586 int backwards = 0;
4587 if (cmd_match(buf, "forwards"))
4588 backwards = 0;
4589 else if (cmd_match(buf, "backwards"))
4590 backwards = 1;
4591 else
4592 return -EINVAL;
4593 if (mddev->reshape_backwards == backwards)
4594 return len;
4595
4596
4597 if (mddev->delta_disks)
4598 return -EBUSY;
4599
4600 if (mddev->persistent &&
4601 mddev->major_version == 0)
4602 return -EINVAL;
4603
4604 mddev->reshape_backwards = backwards;
4605 return len;
4606}
4607
4608static struct md_sysfs_entry md_reshape_direction =
4609__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
4610 reshape_direction_store);
4611
4612static ssize_t
4613array_size_show(struct mddev *mddev, char *page)
4614{
4615 if (mddev->external_size)
4616 return sprintf(page, "%llu\n",
4617 (unsigned long long)mddev->array_sectors/2);
4618 else
4619 return sprintf(page, "default\n");
4620}
4621
4622static ssize_t
4623array_size_store(struct mddev *mddev, const char *buf, size_t len)
4624{
4625 sector_t sectors;
4626
4627 if (strncmp(buf, "default", 7) == 0) {
4628 if (mddev->pers)
4629 sectors = mddev->pers->size(mddev, 0, 0);
4630 else
4631 sectors = mddev->array_sectors;
4632
4633 mddev->external_size = 0;
4634 } else {
4635 if (strict_blocks_to_sectors(buf, §ors) < 0)
4636 return -EINVAL;
4637 if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4638 return -E2BIG;
4639
4640 mddev->external_size = 1;
4641 }
4642
4643 mddev->array_sectors = sectors;
4644 if (mddev->pers) {
4645 set_capacity(mddev->gendisk, mddev->array_sectors);
4646 revalidate_disk(mddev->gendisk);
4647 }
4648 return len;
4649}
4650
4651static struct md_sysfs_entry md_array_size =
4652__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4653 array_size_store);
4654
4655static struct attribute *md_default_attrs[] = {
4656 &md_level.attr,
4657 &md_layout.attr,
4658 &md_raid_disks.attr,
4659 &md_chunk_size.attr,
4660 &md_size.attr,
4661 &md_resync_start.attr,
4662 &md_metadata.attr,
4663 &md_new_device.attr,
4664 &md_safe_delay.attr,
4665 &md_array_state.attr,
4666 &md_reshape_position.attr,
4667 &md_reshape_direction.attr,
4668 &md_array_size.attr,
4669 &max_corr_read_errors.attr,
4670 NULL,
4671};
4672
4673static struct attribute *md_redundancy_attrs[] = {
4674 &md_scan_mode.attr,
4675 &md_last_scan_mode.attr,
4676 &md_mismatches.attr,
4677 &md_sync_min.attr,
4678 &md_sync_max.attr,
4679 &md_sync_speed.attr,
4680 &md_sync_force_parallel.attr,
4681 &md_sync_completed.attr,
4682 &md_min_sync.attr,
4683 &md_max_sync.attr,
4684 &md_suspend_lo.attr,
4685 &md_suspend_hi.attr,
4686 &md_bitmap.attr,
4687 &md_degraded.attr,
4688 NULL,
4689};
4690static struct attribute_group md_redundancy_group = {
4691 .name = NULL,
4692 .attrs = md_redundancy_attrs,
4693};
4694
4695
4696static ssize_t
4697md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4698{
4699 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4700 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4701 ssize_t rv;
4702
4703 if (!entry->show)
4704 return -EIO;
4705 spin_lock(&all_mddevs_lock);
4706 if (list_empty(&mddev->all_mddevs)) {
4707 spin_unlock(&all_mddevs_lock);
4708 return -EBUSY;
4709 }
4710 mddev_get(mddev);
4711 spin_unlock(&all_mddevs_lock);
4712
4713 rv = mddev_lock(mddev);
4714 if (!rv) {
4715 rv = entry->show(mddev, page);
4716 mddev_unlock(mddev);
4717 }
4718 mddev_put(mddev);
4719 return rv;
4720}
4721
4722static ssize_t
4723md_attr_store(struct kobject *kobj, struct attribute *attr,
4724 const char *page, size_t length)
4725{
4726 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4727 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4728 ssize_t rv;
4729
4730 if (!entry->store)
4731 return -EIO;
4732 if (!capable(CAP_SYS_ADMIN))
4733 return -EACCES;
4734 spin_lock(&all_mddevs_lock);
4735 if (list_empty(&mddev->all_mddevs)) {
4736 spin_unlock(&all_mddevs_lock);
4737 return -EBUSY;
4738 }
4739 mddev_get(mddev);
4740 spin_unlock(&all_mddevs_lock);
4741 if (entry->store == new_dev_store)
4742 flush_workqueue(md_misc_wq);
4743 rv = mddev_lock(mddev);
4744 if (!rv) {
4745 rv = entry->store(mddev, page, length);
4746 mddev_unlock(mddev);
4747 }
4748 mddev_put(mddev);
4749 return rv;
4750}
4751
4752static void md_free(struct kobject *ko)
4753{
4754 struct mddev *mddev = container_of(ko, struct mddev, kobj);
4755
4756 if (mddev->sysfs_state)
4757 sysfs_put(mddev->sysfs_state);
4758
4759 if (mddev->gendisk) {
4760 del_gendisk(mddev->gendisk);
4761 put_disk(mddev->gendisk);
4762 }
4763 if (mddev->queue)
4764 blk_cleanup_queue(mddev->queue);
4765
4766 kfree(mddev);
4767}
4768
4769static const struct sysfs_ops md_sysfs_ops = {
4770 .show = md_attr_show,
4771 .store = md_attr_store,
4772};
4773static struct kobj_type md_ktype = {
4774 .release = md_free,
4775 .sysfs_ops = &md_sysfs_ops,
4776 .default_attrs = md_default_attrs,
4777};
4778
4779int mdp_major = 0;
4780
4781static void mddev_delayed_delete(struct work_struct *ws)
4782{
4783 struct mddev *mddev = container_of(ws, struct mddev, del_work);
4784
4785 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
4786 kobject_del(&mddev->kobj);
4787 kobject_put(&mddev->kobj);
4788}
4789
4790static int md_alloc(dev_t dev, char *name)
4791{
4792 static DEFINE_MUTEX(disks_mutex);
4793 struct mddev *mddev = mddev_find(dev);
4794 struct gendisk *disk;
4795 int partitioned;
4796 int shift;
4797 int unit;
4798 int error;
4799
4800 if (!mddev)
4801 return -ENODEV;
4802
4803 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
4804 shift = partitioned ? MdpMinorShift : 0;
4805 unit = MINOR(mddev->unit) >> shift;
4806
4807
4808
4809
4810 flush_workqueue(md_misc_wq);
4811
4812 mutex_lock(&disks_mutex);
4813 error = -EEXIST;
4814 if (mddev->gendisk)
4815 goto abort;
4816
4817 if (name) {
4818
4819
4820 struct mddev *mddev2;
4821 spin_lock(&all_mddevs_lock);
4822
4823 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
4824 if (mddev2->gendisk &&
4825 strcmp(mddev2->gendisk->disk_name, name) == 0) {
4826 spin_unlock(&all_mddevs_lock);
4827 goto abort;
4828 }
4829 spin_unlock(&all_mddevs_lock);
4830 }
4831
4832 error = -ENOMEM;
4833 mddev->queue = blk_alloc_queue(GFP_KERNEL);
4834 if (!mddev->queue)
4835 goto abort;
4836 mddev->queue->queuedata = mddev;
4837
4838 blk_queue_make_request(mddev->queue, md_make_request);
4839 blk_set_stacking_limits(&mddev->queue->limits);
4840
4841 disk = alloc_disk(1 << shift);
4842 if (!disk) {
4843 blk_cleanup_queue(mddev->queue);
4844 mddev->queue = NULL;
4845 goto abort;
4846 }
4847 disk->major = MAJOR(mddev->unit);
4848 disk->first_minor = unit << shift;
4849 if (name)
4850 strcpy(disk->disk_name, name);
4851 else if (partitioned)
4852 sprintf(disk->disk_name, "md_d%d", unit);
4853 else
4854 sprintf(disk->disk_name, "md%d", unit);
4855 disk->fops = &md_fops;
4856 disk->private_data = mddev;
4857 disk->queue = mddev->queue;
4858 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
4859
4860
4861
4862
4863 disk->flags |= GENHD_FL_EXT_DEVT;
4864 mddev->gendisk = disk;
4865
4866
4867
4868 mutex_lock(&mddev->open_mutex);
4869 add_disk(disk);
4870
4871 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
4872 &disk_to_dev(disk)->kobj, "%s", "md");
4873 if (error) {
4874
4875
4876
4877 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
4878 disk->disk_name);
4879 error = 0;
4880 }
4881 if (mddev->kobj.sd &&
4882 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
4883 printk(KERN_DEBUG "pointless warning\n");
4884 mutex_unlock(&mddev->open_mutex);
4885 abort:
4886 mutex_unlock(&disks_mutex);
4887 if (!error && mddev->kobj.sd) {
4888 kobject_uevent(&mddev->kobj, KOBJ_ADD);
4889 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
4890 }
4891 mddev_put(mddev);
4892 return error;
4893}
4894
4895static struct kobject *md_probe(dev_t dev, int *part, void *data)
4896{
4897 md_alloc(dev, NULL);
4898 return NULL;
4899}
4900
4901static int add_named_array(const char *val, struct kernel_param *kp)
4902{
4903
4904
4905
4906
4907 int len = strlen(val);
4908 char buf[DISK_NAME_LEN];
4909
4910 while (len && val[len-1] == '\n')
4911 len--;
4912 if (len >= DISK_NAME_LEN)
4913 return -E2BIG;
4914 strlcpy(buf, val, len+1);
4915 if (strncmp(buf, "md_", 3) != 0)
4916 return -EINVAL;
4917 return md_alloc(0, buf);
4918}
4919
4920static void md_safemode_timeout(unsigned long data)
4921{
4922 struct mddev *mddev = (struct mddev *) data;
4923
4924 if (!atomic_read(&mddev->writes_pending)) {
4925 mddev->safemode = 1;
4926 if (mddev->external)
4927 sysfs_notify_dirent_safe(mddev->sysfs_state);
4928 }
4929 md_wakeup_thread(mddev->thread);
4930}
4931
4932static int start_dirty_degraded;
4933
4934int md_run(struct mddev *mddev)
4935{
4936 int err;
4937 struct md_rdev *rdev;
4938 struct md_personality *pers;
4939
4940 if (list_empty(&mddev->disks))
4941
4942 return -EINVAL;
4943
4944 if (mddev->pers)
4945 return -EBUSY;
4946
4947 if (mddev->sysfs_active)
4948 return -EBUSY;
4949
4950
4951
4952
4953 if (!mddev->raid_disks) {
4954 if (!mddev->persistent)
4955 return -EINVAL;
4956 analyze_sbs(mddev);
4957 }
4958
4959 if (mddev->level != LEVEL_NONE)
4960 request_module("md-level-%d", mddev->level);
4961 else if (mddev->clevel[0])
4962 request_module("md-%s", mddev->clevel);
4963
4964
4965
4966
4967
4968
4969 rdev_for_each(rdev, mddev) {
4970 if (test_bit(Faulty, &rdev->flags))
4971 continue;
4972 sync_blockdev(rdev->bdev);
4973 invalidate_bdev(rdev->bdev);
4974
4975
4976
4977
4978
4979 if (rdev->meta_bdev) {
4980 ;
4981 } else if (rdev->data_offset < rdev->sb_start) {
4982 if (mddev->dev_sectors &&
4983 rdev->data_offset + mddev->dev_sectors
4984 > rdev->sb_start) {
4985 printk("md: %s: data overlaps metadata\n",
4986 mdname(mddev));
4987 return -EINVAL;
4988 }
4989 } else {
4990 if (rdev->sb_start + rdev->sb_size/512
4991 > rdev->data_offset) {
4992 printk("md: %s: metadata overlaps data\n",
4993 mdname(mddev));
4994 return -EINVAL;
4995 }
4996 }
4997 sysfs_notify_dirent_safe(rdev->sysfs_state);
4998 }
4999
5000 if (mddev->bio_set == NULL)
5001 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
5002
5003 spin_lock(&pers_lock);
5004 pers = find_pers(mddev->level, mddev->clevel);
5005 if (!pers || !try_module_get(pers->owner)) {
5006 spin_unlock(&pers_lock);
5007 if (mddev->level != LEVEL_NONE)
5008 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
5009 mddev->level);
5010 else
5011 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
5012 mddev->clevel);
5013 return -EINVAL;
5014 }
5015 mddev->pers = pers;
5016 spin_unlock(&pers_lock);
5017 if (mddev->level != pers->level) {
5018 mddev->level = pers->level;
5019 mddev->new_level = pers->level;
5020 }
5021 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5022
5023 if (mddev->reshape_position != MaxSector &&
5024 pers->start_reshape == NULL) {
5025
5026 mddev->pers = NULL;
5027 module_put(pers->owner);
5028 return -EINVAL;
5029 }
5030
5031 if (pers->sync_request) {
5032
5033
5034
5035 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5036 struct md_rdev *rdev2;
5037 int warned = 0;
5038
5039 rdev_for_each(rdev, mddev)
5040 rdev_for_each(rdev2, mddev) {
5041 if (rdev < rdev2 &&
5042 rdev->bdev->bd_contains ==
5043 rdev2->bdev->bd_contains) {
5044 printk(KERN_WARNING
5045 "%s: WARNING: %s appears to be"
5046 " on the same physical disk as"
5047 " %s.\n",
5048 mdname(mddev),
5049 bdevname(rdev->bdev,b),
5050 bdevname(rdev2->bdev,b2));
5051 warned = 1;
5052 }
5053 }
5054
5055 if (warned)
5056 printk(KERN_WARNING
5057 "True protection against single-disk"
5058 " failure might be compromised.\n");
5059 }
5060
5061 mddev->recovery = 0;
5062
5063 mddev->resync_max_sectors = mddev->dev_sectors;
5064
5065 mddev->ok_start_degraded = start_dirty_degraded;
5066
5067 if (start_readonly && mddev->ro == 0)
5068 mddev->ro = 2;
5069
5070 err = mddev->pers->run(mddev);
5071 if (err)
5072 printk(KERN_ERR "md: pers->run() failed ...\n");
5073 else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
5074 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
5075 " but 'external_size' not in effect?\n", __func__);
5076 printk(KERN_ERR
5077 "md: invalid array_size %llu > default size %llu\n",
5078 (unsigned long long)mddev->array_sectors / 2,
5079 (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
5080 err = -EINVAL;
5081 mddev->pers->stop(mddev);
5082 }
5083 if (err == 0 && mddev->pers->sync_request &&
5084 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5085 err = bitmap_create(mddev);
5086 if (err) {
5087 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
5088 mdname(mddev), err);
5089 mddev->pers->stop(mddev);
5090 }
5091 }
5092 if (err) {
5093 module_put(mddev->pers->owner);
5094 mddev->pers = NULL;
5095 bitmap_destroy(mddev);
5096 return err;
5097 }
5098 if (mddev->pers->sync_request) {
5099 if (mddev->kobj.sd &&
5100 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5101 printk(KERN_WARNING
5102 "md: cannot register extra attributes for %s\n",
5103 mdname(mddev));
5104 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5105 } else if (mddev->ro == 2)
5106 mddev->ro = 0;
5107
5108 atomic_set(&mddev->writes_pending,0);
5109 atomic_set(&mddev->max_corr_read_errors,
5110 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5111 mddev->safemode = 0;
5112 mddev->safemode_timer.function = md_safemode_timeout;
5113 mddev->safemode_timer.data = (unsigned long) mddev;
5114 mddev->safemode_delay = (200 * HZ)/1000 +1;
5115 mddev->in_sync = 1;
5116 smp_wmb();
5117 mddev->ready = 1;
5118 rdev_for_each(rdev, mddev)
5119 if (rdev->raid_disk >= 0)
5120 if (sysfs_link_rdev(mddev, rdev))
5121 ;
5122
5123 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5124
5125 if (mddev->flags & MD_UPDATE_SB_FLAGS)
5126 md_update_sb(mddev, 0);
5127
5128 md_new_event(mddev);
5129 sysfs_notify_dirent_safe(mddev->sysfs_state);
5130 sysfs_notify_dirent_safe(mddev->sysfs_action);
5131 sysfs_notify(&mddev->kobj, NULL, "degraded");
5132 return 0;
5133}
5134EXPORT_SYMBOL_GPL(md_run);
5135
5136static int do_md_run(struct mddev *mddev)
5137{
5138 int err;
5139
5140 err = md_run(mddev);
5141 if (err)
5142 goto out;
5143 err = bitmap_load(mddev);
5144 if (err) {
5145 bitmap_destroy(mddev);
5146 goto out;
5147 }
5148
5149 md_wakeup_thread(mddev->thread);
5150 md_wakeup_thread(mddev->sync_thread);
5151
5152 set_capacity(mddev->gendisk, mddev->array_sectors);
5153 revalidate_disk(mddev->gendisk);
5154 mddev->changed = 1;
5155 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5156out:
5157 return err;
5158}
5159
5160static int restart_array(struct mddev *mddev)
5161{
5162 struct gendisk *disk = mddev->gendisk;
5163
5164
5165 if (list_empty(&mddev->disks))
5166 return -ENXIO;
5167 if (!mddev->pers)
5168 return -EINVAL;
5169 if (!mddev->ro)
5170 return -EBUSY;
5171 mddev->safemode = 0;
5172 mddev->ro = 0;
5173 set_disk_ro(disk, 0);
5174 printk(KERN_INFO "md: %s switched to read-write mode.\n",
5175 mdname(mddev));
5176
5177 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5178 md_wakeup_thread(mddev->thread);
5179 md_wakeup_thread(mddev->sync_thread);
5180 sysfs_notify_dirent_safe(mddev->sysfs_state);
5181 return 0;
5182}
5183
5184
5185
5186static int deny_bitmap_write_access(struct file * file)
5187{
5188 struct inode *inode = file->f_mapping->host;
5189
5190 spin_lock(&inode->i_lock);
5191 if (atomic_read(&inode->i_writecount) > 1) {
5192 spin_unlock(&inode->i_lock);
5193 return -ETXTBSY;
5194 }
5195 atomic_set(&inode->i_writecount, -1);
5196 spin_unlock(&inode->i_lock);
5197
5198 return 0;
5199}
5200
5201void restore_bitmap_write_access(struct file *file)
5202{
5203 struct inode *inode = file->f_mapping->host;
5204
5205 spin_lock(&inode->i_lock);
5206 atomic_set(&inode->i_writecount, 1);
5207 spin_unlock(&inode->i_lock);
5208}
5209
5210static void md_clean(struct mddev *mddev)
5211{
5212 mddev->array_sectors = 0;
5213 mddev->external_size = 0;
5214 mddev->dev_sectors = 0;
5215 mddev->raid_disks = 0;
5216 mddev->recovery_cp = 0;
5217 mddev->resync_min = 0;
5218 mddev->resync_max = MaxSector;
5219 mddev->reshape_position = MaxSector;
5220 mddev->external = 0;
5221 mddev->persistent = 0;
5222 mddev->level = LEVEL_NONE;
5223 mddev->clevel[0] = 0;
5224 mddev->flags = 0;
5225 mddev->ro = 0;
5226 mddev->metadata_type[0] = 0;
5227 mddev->chunk_sectors = 0;
5228 mddev->ctime = mddev->utime = 0;
5229 mddev->layout = 0;
5230 mddev->max_disks = 0;
5231 mddev->events = 0;
5232 mddev->can_decrease_events = 0;
5233 mddev->delta_disks = 0;
5234 mddev->reshape_backwards = 0;
5235 mddev->new_level = LEVEL_NONE;
5236 mddev->new_layout = 0;
5237 mddev->new_chunk_sectors = 0;
5238 mddev->curr_resync = 0;
5239 atomic64_set(&mddev->resync_mismatches, 0);
5240 mddev->suspend_lo = mddev->suspend_hi = 0;
5241 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5242 mddev->recovery = 0;
5243 mddev->in_sync = 0;
5244 mddev->changed = 0;
5245 mddev->degraded = 0;
5246 mddev->safemode = 0;
5247 mddev->merge_check_needed = 0;
5248 mddev->bitmap_info.offset = 0;
5249 mddev->bitmap_info.default_offset = 0;
5250 mddev->bitmap_info.default_space = 0;
5251 mddev->bitmap_info.chunksize = 0;
5252 mddev->bitmap_info.daemon_sleep = 0;
5253 mddev->bitmap_info.max_write_behind = 0;
5254}
5255
5256static void __md_stop_writes(struct mddev *mddev)
5257{
5258 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5259 if (mddev->sync_thread) {
5260 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5261 md_reap_sync_thread(mddev);
5262 }
5263
5264 del_timer_sync(&mddev->safemode_timer);
5265
5266 bitmap_flush(mddev);
5267 md_super_wait(mddev);
5268
5269 if (mddev->ro == 0 &&
5270 (!mddev->in_sync || (mddev->flags & MD_UPDATE_SB_FLAGS))) {
5271
5272 mddev->in_sync = 1;
5273 md_update_sb(mddev, 1);
5274 }
5275}
5276
5277void md_stop_writes(struct mddev *mddev)
5278{
5279 mddev_lock_nointr(mddev);
5280 __md_stop_writes(mddev);
5281 mddev_unlock(mddev);
5282}
5283EXPORT_SYMBOL_GPL(md_stop_writes);
5284
5285static void __md_stop(struct mddev *mddev)
5286{
5287 mddev->ready = 0;
5288 mddev->pers->stop(mddev);
5289 if (mddev->pers->sync_request && mddev->to_remove == NULL)
5290 mddev->to_remove = &md_redundancy_group;
5291 module_put(mddev->pers->owner);
5292 mddev->pers = NULL;
5293 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5294}
5295
5296void md_stop(struct mddev *mddev)
5297{
5298
5299
5300
5301 __md_stop(mddev);
5302 bitmap_destroy(mddev);
5303 if (mddev->bio_set)
5304 bioset_free(mddev->bio_set);
5305}
5306
5307EXPORT_SYMBOL_GPL(md_stop);
5308
5309static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5310{
5311 int err = 0;
5312 int did_freeze = 0;
5313
5314 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5315 did_freeze = 1;
5316 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5317 md_wakeup_thread(mddev->thread);
5318 }
5319 if (mddev->sync_thread) {
5320 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5321
5322
5323 wake_up_process(mddev->sync_thread->tsk);
5324 }
5325 mddev_unlock(mddev);
5326 wait_event(resync_wait, mddev->sync_thread == NULL);
5327 mddev_lock_nointr(mddev);
5328
5329 mutex_lock(&mddev->open_mutex);
5330 if (atomic_read(&mddev->openers) > !!bdev ||
5331 mddev->sync_thread ||
5332 (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
5333 printk("md: %s still in use.\n",mdname(mddev));
5334 if (did_freeze) {
5335 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5336 md_wakeup_thread(mddev->thread);
5337 }
5338 err = -EBUSY;
5339 goto out;
5340 }
5341 if (mddev->pers) {
5342 __md_stop_writes(mddev);
5343
5344 err = -ENXIO;
5345 if (mddev->ro==1)
5346 goto out;
5347 mddev->ro = 1;
5348 set_disk_ro(mddev->gendisk, 1);
5349 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5350 sysfs_notify_dirent_safe(mddev->sysfs_state);
5351 err = 0;
5352 }
5353out:
5354 mutex_unlock(&mddev->open_mutex);
5355 return err;
5356}
5357
5358
5359
5360
5361
5362static int do_md_stop(struct mddev * mddev, int mode,
5363 struct block_device *bdev)
5364{
5365 struct gendisk *disk = mddev->gendisk;
5366 struct md_rdev *rdev;
5367 int did_freeze = 0;
5368
5369 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5370 did_freeze = 1;
5371 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5372 md_wakeup_thread(mddev->thread);
5373 }
5374 if (mddev->sync_thread) {
5375 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5376
5377
5378 wake_up_process(mddev->sync_thread->tsk);
5379 }
5380 mddev_unlock(mddev);
5381 wait_event(resync_wait, mddev->sync_thread == NULL);
5382 mddev_lock_nointr(mddev);
5383
5384 mutex_lock(&mddev->open_mutex);
5385 if (atomic_read(&mddev->openers) > !!bdev ||
5386 mddev->sysfs_active ||
5387 mddev->sync_thread ||
5388 (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
5389 printk("md: %s still in use.\n",mdname(mddev));
5390 mutex_unlock(&mddev->open_mutex);
5391 if (did_freeze) {
5392 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5393 md_wakeup_thread(mddev->thread);
5394 }
5395 return -EBUSY;
5396 }
5397 if (mddev->pers) {
5398 if (mddev->ro)
5399 set_disk_ro(disk, 0);
5400
5401 __md_stop_writes(mddev);
5402 __md_stop(mddev);
5403 mddev->queue->merge_bvec_fn = NULL;
5404 mddev->queue->backing_dev_info.congested_fn = NULL;
5405
5406
5407 sysfs_notify_dirent_safe(mddev->sysfs_state);
5408
5409 rdev_for_each(rdev, mddev)
5410 if (rdev->raid_disk >= 0)
5411 sysfs_unlink_rdev(mddev, rdev);
5412
5413 set_capacity(disk, 0);
5414 mutex_unlock(&mddev->open_mutex);
5415 mddev->changed = 1;
5416 revalidate_disk(disk);
5417
5418 if (mddev->ro)
5419 mddev->ro = 0;
5420 } else
5421 mutex_unlock(&mddev->open_mutex);
5422
5423
5424
5425 if (mode == 0) {
5426 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
5427
5428 bitmap_destroy(mddev);
5429 if (mddev->bitmap_info.file) {
5430 restore_bitmap_write_access(mddev->bitmap_info.file);
5431 fput(mddev->bitmap_info.file);
5432 mddev->bitmap_info.file = NULL;
5433 }
5434 mddev->bitmap_info.offset = 0;
5435
5436 export_array(mddev);
5437
5438 md_clean(mddev);
5439 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5440 if (mddev->hold_active == UNTIL_STOP)
5441 mddev->hold_active = 0;
5442 }
5443 blk_integrity_unregister(disk);
5444 md_new_event(mddev);
5445 sysfs_notify_dirent_safe(mddev->sysfs_state);
5446 return 0;
5447}
5448
5449#ifndef MODULE
5450static void autorun_array(struct mddev *mddev)
5451{
5452 struct md_rdev *rdev;
5453 int err;
5454
5455 if (list_empty(&mddev->disks))
5456 return;
5457
5458 printk(KERN_INFO "md: running: ");
5459
5460 rdev_for_each(rdev, mddev) {
5461 char b[BDEVNAME_SIZE];
5462 printk("<%s>", bdevname(rdev->bdev,b));
5463 }
5464 printk("\n");
5465
5466 err = do_md_run(mddev);
5467 if (err) {
5468 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
5469 do_md_stop(mddev, 0, NULL);
5470 }
5471}
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485static void autorun_devices(int part)
5486{
5487 struct md_rdev *rdev0, *rdev, *tmp;
5488 struct mddev *mddev;
5489 char b[BDEVNAME_SIZE];
5490
5491 printk(KERN_INFO "md: autorun ...\n");
5492 while (!list_empty(&pending_raid_disks)) {
5493 int unit;
5494 dev_t dev;
5495 LIST_HEAD(candidates);
5496 rdev0 = list_entry(pending_raid_disks.next,
5497 struct md_rdev, same_set);
5498
5499 printk(KERN_INFO "md: considering %s ...\n",
5500 bdevname(rdev0->bdev,b));
5501 INIT_LIST_HEAD(&candidates);
5502 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
5503 if (super_90_load(rdev, rdev0, 0) >= 0) {
5504 printk(KERN_INFO "md: adding %s ...\n",
5505 bdevname(rdev->bdev,b));
5506 list_move(&rdev->same_set, &candidates);
5507 }
5508
5509
5510
5511
5512
5513 if (part) {
5514 dev = MKDEV(mdp_major,
5515 rdev0->preferred_minor << MdpMinorShift);
5516 unit = MINOR(dev) >> MdpMinorShift;
5517 } else {
5518 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
5519 unit = MINOR(dev);
5520 }
5521 if (rdev0->preferred_minor != unit) {
5522 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
5523 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
5524 break;
5525 }
5526
5527 md_probe(dev, NULL, NULL);
5528 mddev = mddev_find(dev);
5529 if (!mddev || !mddev->gendisk) {
5530 if (mddev)
5531 mddev_put(mddev);
5532 printk(KERN_ERR
5533 "md: cannot allocate memory for md drive.\n");
5534 break;
5535 }
5536 if (mddev_lock(mddev))
5537 printk(KERN_WARNING "md: %s locked, cannot run\n",
5538 mdname(mddev));
5539 else if (mddev->raid_disks || mddev->major_version
5540 || !list_empty(&mddev->disks)) {
5541 printk(KERN_WARNING
5542 "md: %s already running, cannot run %s\n",
5543 mdname(mddev), bdevname(rdev0->bdev,b));
5544 mddev_unlock(mddev);
5545 } else {
5546 printk(KERN_INFO "md: created %s\n", mdname(mddev));
5547 mddev->persistent = 1;
5548 rdev_for_each_list(rdev, tmp, &candidates) {
5549 list_del_init(&rdev->same_set);
5550 if (bind_rdev_to_array(rdev, mddev))
5551 export_rdev(rdev);
5552 }
5553 autorun_array(mddev);
5554 mddev_unlock(mddev);
5555 }
5556
5557
5558
5559 rdev_for_each_list(rdev, tmp, &candidates) {
5560 list_del_init(&rdev->same_set);
5561 export_rdev(rdev);
5562 }
5563 mddev_put(mddev);
5564 }
5565 printk(KERN_INFO "md: ... autorun DONE.\n");
5566}
5567#endif
5568
5569static int get_version(void __user * arg)
5570{
5571 mdu_version_t ver;
5572
5573 ver.major = MD_MAJOR_VERSION;
5574 ver.minor = MD_MINOR_VERSION;
5575 ver.patchlevel = MD_PATCHLEVEL_VERSION;
5576
5577 if (copy_to_user(arg, &ver, sizeof(ver)))
5578 return -EFAULT;
5579
5580 return 0;
5581}
5582
5583static int get_array_info(struct mddev * mddev, void __user * arg)
5584{
5585 mdu_array_info_t info;
5586 int nr,working,insync,failed,spare;
5587 struct md_rdev *rdev;
5588
5589 nr = working = insync = failed = spare = 0;
5590 rcu_read_lock();
5591 rdev_for_each_rcu(rdev, mddev) {
5592 nr++;
5593 if (test_bit(Faulty, &rdev->flags))
5594 failed++;
5595 else {
5596 working++;
5597 if (test_bit(In_sync, &rdev->flags))
5598 insync++;
5599 else
5600 spare++;
5601 }
5602 }
5603 rcu_read_unlock();
5604
5605 info.major_version = mddev->major_version;
5606 info.minor_version = mddev->minor_version;
5607 info.patch_version = MD_PATCHLEVEL_VERSION;
5608 info.ctime = mddev->ctime;
5609 info.level = mddev->level;
5610 info.size = mddev->dev_sectors / 2;
5611 if (info.size != mddev->dev_sectors / 2)
5612 info.size = -1;
5613 info.nr_disks = nr;
5614 info.raid_disks = mddev->raid_disks;
5615 info.md_minor = mddev->md_minor;
5616 info.not_persistent= !mddev->persistent;
5617
5618 info.utime = mddev->utime;
5619 info.state = 0;
5620 if (mddev->in_sync)
5621 info.state = (1<<MD_SB_CLEAN);
5622 if (mddev->bitmap && mddev->bitmap_info.offset)
5623 info.state = (1<<MD_SB_BITMAP_PRESENT);
5624 info.active_disks = insync;
5625 info.working_disks = working;
5626 info.failed_disks = failed;
5627 info.spare_disks = spare;
5628
5629 info.layout = mddev->layout;
5630 info.chunk_size = mddev->chunk_sectors << 9;
5631
5632 if (copy_to_user(arg, &info, sizeof(info)))
5633 return -EFAULT;
5634
5635 return 0;
5636}
5637
5638static int get_bitmap_file(struct mddev * mddev, void __user * arg)
5639{
5640 mdu_bitmap_file_t *file = NULL;
5641 char *ptr, *buf = NULL;
5642 int err = -ENOMEM;
5643
5644 file = kmalloc(sizeof(*file), GFP_NOIO);
5645
5646 if (!file)
5647 goto out;
5648
5649
5650 if (!mddev->bitmap || !mddev->bitmap->storage.file) {
5651 file->pathname[0] = '\0';
5652 goto copy_out;
5653 }
5654
5655 buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
5656 if (!buf)
5657 goto out;
5658
5659 ptr = d_path(&mddev->bitmap->storage.file->f_path,
5660 buf, sizeof(file->pathname));
5661 if (IS_ERR(ptr))
5662 goto out;
5663
5664 strcpy(file->pathname, ptr);
5665
5666copy_out:
5667 err = 0;
5668 if (copy_to_user(arg, file, sizeof(*file)))
5669 err = -EFAULT;
5670out:
5671 kfree(buf);
5672 kfree(file);
5673 return err;
5674}
5675
5676static int get_disk_info(struct mddev * mddev, void __user * arg)
5677{
5678 mdu_disk_info_t info;
5679 struct md_rdev *rdev;
5680
5681 if (copy_from_user(&info, arg, sizeof(info)))
5682 return -EFAULT;
5683
5684 rcu_read_lock();
5685 rdev = find_rdev_nr_rcu(mddev, info.number);
5686 if (rdev) {
5687 info.major = MAJOR(rdev->bdev->bd_dev);
5688 info.minor = MINOR(rdev->bdev->bd_dev);
5689 info.raid_disk = rdev->raid_disk;
5690 info.state = 0;
5691 if (test_bit(Faulty, &rdev->flags))
5692 info.state |= (1<<MD_DISK_FAULTY);
5693 else if (test_bit(In_sync, &rdev->flags)) {
5694 info.state |= (1<<MD_DISK_ACTIVE);
5695 info.state |= (1<<MD_DISK_SYNC);
5696 }
5697 if (test_bit(WriteMostly, &rdev->flags))
5698 info.state |= (1<<MD_DISK_WRITEMOSTLY);
5699 } else {
5700 info.major = info.minor = 0;
5701 info.raid_disk = -1;
5702 info.state = (1<<MD_DISK_REMOVED);
5703 }
5704 rcu_read_unlock();
5705
5706 if (copy_to_user(arg, &info, sizeof(info)))
5707 return -EFAULT;
5708
5709 return 0;
5710}
5711
5712static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
5713{
5714 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5715 struct md_rdev *rdev;
5716 dev_t dev = MKDEV(info->major,info->minor);
5717
5718 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
5719 return -EOVERFLOW;
5720
5721 if (!mddev->raid_disks) {
5722 int err;
5723
5724 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
5725 if (IS_ERR(rdev)) {
5726 printk(KERN_WARNING
5727 "md: md_import_device returned %ld\n",
5728 PTR_ERR(rdev));
5729 return PTR_ERR(rdev);
5730 }
5731 if (!list_empty(&mddev->disks)) {
5732 struct md_rdev *rdev0
5733 = list_entry(mddev->disks.next,
5734 struct md_rdev, same_set);
5735 err = super_types[mddev->major_version]
5736 .load_super(rdev, rdev0, mddev->minor_version);
5737 if (err < 0) {
5738 printk(KERN_WARNING
5739 "md: %s has different UUID to %s\n",
5740 bdevname(rdev->bdev,b),
5741 bdevname(rdev0->bdev,b2));
5742 export_rdev(rdev);
5743 return -EINVAL;
5744 }
5745 }
5746 err = bind_rdev_to_array(rdev, mddev);
5747 if (err)
5748 export_rdev(rdev);
5749 return err;
5750 }
5751
5752
5753
5754
5755
5756
5757 if (mddev->pers) {
5758 int err;
5759 if (!mddev->pers->hot_add_disk) {
5760 printk(KERN_WARNING
5761 "%s: personality does not support diskops!\n",
5762 mdname(mddev));
5763 return -EINVAL;
5764 }
5765 if (mddev->persistent)
5766 rdev = md_import_device(dev, mddev->major_version,
5767 mddev->minor_version);
5768 else
5769 rdev = md_import_device(dev, -1, -1);
5770 if (IS_ERR(rdev)) {
5771 printk(KERN_WARNING
5772 "md: md_import_device returned %ld\n",
5773 PTR_ERR(rdev));
5774 return PTR_ERR(rdev);
5775 }
5776
5777 if (!mddev->persistent) {
5778 if (info->state & (1<<MD_DISK_SYNC) &&
5779 info->raid_disk < mddev->raid_disks) {
5780 rdev->raid_disk = info->raid_disk;
5781 set_bit(In_sync, &rdev->flags);
5782 clear_bit(Bitmap_sync, &rdev->flags);
5783 } else
5784 rdev->raid_disk = -1;
5785 rdev->saved_raid_disk = rdev->raid_disk;
5786 } else
5787 super_types[mddev->major_version].
5788 validate_super(mddev, rdev);
5789 if ((info->state & (1<<MD_DISK_SYNC)) &&
5790 rdev->raid_disk != info->raid_disk) {
5791
5792
5793
5794 export_rdev(rdev);
5795 return -EINVAL;
5796 }
5797
5798 clear_bit(In_sync, &rdev->flags);
5799 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5800 set_bit(WriteMostly, &rdev->flags);
5801 else
5802 clear_bit(WriteMostly, &rdev->flags);
5803
5804 rdev->raid_disk = -1;
5805 err = bind_rdev_to_array(rdev, mddev);
5806 if (!err && !mddev->pers->hot_remove_disk) {
5807
5808
5809
5810
5811 super_types[mddev->major_version].
5812 validate_super(mddev, rdev);
5813 err = mddev->pers->hot_add_disk(mddev, rdev);
5814 if (err)
5815 unbind_rdev_from_array(rdev);
5816 }
5817 if (err)
5818 export_rdev(rdev);
5819 else
5820 sysfs_notify_dirent_safe(rdev->sysfs_state);
5821
5822 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5823 if (mddev->degraded)
5824 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5825 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5826 if (!err)
5827 md_new_event(mddev);
5828 md_wakeup_thread(mddev->thread);
5829 return err;
5830 }
5831
5832
5833
5834
5835 if (mddev->major_version != 0) {
5836 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
5837 mdname(mddev));
5838 return -EINVAL;
5839 }
5840
5841 if (!(info->state & (1<<MD_DISK_FAULTY))) {
5842 int err;
5843 rdev = md_import_device(dev, -1, 0);
5844 if (IS_ERR(rdev)) {
5845 printk(KERN_WARNING
5846 "md: error, md_import_device() returned %ld\n",
5847 PTR_ERR(rdev));
5848 return PTR_ERR(rdev);
5849 }
5850 rdev->desc_nr = info->number;
5851 if (info->raid_disk < mddev->raid_disks)
5852 rdev->raid_disk = info->raid_disk;
5853 else
5854 rdev->raid_disk = -1;
5855
5856 if (rdev->raid_disk < mddev->raid_disks)
5857 if (info->state & (1<<MD_DISK_SYNC))
5858 set_bit(In_sync, &rdev->flags);
5859
5860 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5861 set_bit(WriteMostly, &rdev->flags);
5862
5863 if (!mddev->persistent) {
5864 printk(KERN_INFO "md: nonpersistent superblock ...\n");
5865 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5866 } else
5867 rdev->sb_start = calc_dev_sboffset(rdev);
5868 rdev->sectors = rdev->sb_start;
5869
5870 err = bind_rdev_to_array(rdev, mddev);
5871 if (err) {
5872 export_rdev(rdev);
5873 return err;
5874 }
5875 }
5876
5877 return 0;
5878}
5879
5880static int hot_remove_disk(struct mddev * mddev, dev_t dev)
5881{
5882 char b[BDEVNAME_SIZE];
5883 struct md_rdev *rdev;
5884
5885 rdev = find_rdev(mddev, dev);
5886 if (!rdev)
5887 return -ENXIO;
5888
5889 clear_bit(Blocked, &rdev->flags);
5890 remove_and_add_spares(mddev, rdev);
5891
5892 if (rdev->raid_disk >= 0)
5893 goto busy;
5894
5895 kick_rdev_from_array(rdev);
5896 md_update_sb(mddev, 1);
5897 md_new_event(mddev);
5898
5899 return 0;
5900busy:
5901 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
5902 bdevname(rdev->bdev,b), mdname(mddev));
5903 return -EBUSY;
5904}
5905
5906static int hot_add_disk(struct mddev * mddev, dev_t dev)
5907{
5908 char b[BDEVNAME_SIZE];
5909 int err;
5910 struct md_rdev *rdev;
5911
5912 if (!mddev->pers)
5913 return -ENODEV;
5914
5915 if (mddev->major_version != 0) {
5916 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
5917 " version-0 superblocks.\n",
5918 mdname(mddev));
5919 return -EINVAL;
5920 }
5921 if (!mddev->pers->hot_add_disk) {
5922 printk(KERN_WARNING
5923 "%s: personality does not support diskops!\n",
5924 mdname(mddev));
5925 return -EINVAL;
5926 }
5927
5928 rdev = md_import_device(dev, -1, 0);
5929 if (IS_ERR(rdev)) {
5930 printk(KERN_WARNING
5931 "md: error, md_import_device() returned %ld\n",
5932 PTR_ERR(rdev));
5933 return -EINVAL;
5934 }
5935
5936 if (mddev->persistent)
5937 rdev->sb_start = calc_dev_sboffset(rdev);
5938 else
5939 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5940
5941 rdev->sectors = rdev->sb_start;
5942
5943 if (test_bit(Faulty, &rdev->flags)) {
5944 printk(KERN_WARNING
5945 "md: can not hot-add faulty %s disk to %s!\n",
5946 bdevname(rdev->bdev,b), mdname(mddev));
5947 err = -EINVAL;
5948 goto abort_export;
5949 }
5950 clear_bit(In_sync, &rdev->flags);
5951 rdev->desc_nr = -1;
5952 rdev->saved_raid_disk = -1;
5953 err = bind_rdev_to_array(rdev, mddev);
5954 if (err)
5955 goto abort_export;
5956
5957
5958
5959
5960
5961
5962 rdev->raid_disk = -1;
5963
5964 md_update_sb(mddev, 1);
5965
5966
5967
5968
5969
5970 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5971 md_wakeup_thread(mddev->thread);
5972 md_new_event(mddev);
5973 return 0;
5974
5975abort_export:
5976 export_rdev(rdev);
5977 return err;
5978}
5979
5980static int set_bitmap_file(struct mddev *mddev, int fd)
5981{
5982 int err;
5983
5984 if (mddev->pers) {
5985 if (!mddev->pers->quiesce)
5986 return -EBUSY;
5987 if (mddev->recovery || mddev->sync_thread)
5988 return -EBUSY;
5989
5990 }
5991
5992
5993 if (fd >= 0) {
5994 if (mddev->bitmap)
5995 return -EEXIST;
5996 mddev->bitmap_info.file = fget(fd);
5997
5998 if (mddev->bitmap_info.file == NULL) {
5999 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
6000 mdname(mddev));
6001 return -EBADF;
6002 }
6003
6004 err = deny_bitmap_write_access(mddev->bitmap_info.file);
6005 if (err) {
6006 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
6007 mdname(mddev));
6008 fput(mddev->bitmap_info.file);
6009 mddev->bitmap_info.file = NULL;
6010 return err;
6011 }
6012 mddev->bitmap_info.offset = 0;
6013 } else if (mddev->bitmap == NULL)
6014 return -ENOENT;
6015 err = 0;
6016 if (mddev->pers) {
6017 mddev->pers->quiesce(mddev, 1);
6018 if (fd >= 0) {
6019 err = bitmap_create(mddev);
6020 if (!err)
6021 err = bitmap_load(mddev);
6022 }
6023 if (fd < 0 || err) {
6024 bitmap_destroy(mddev);
6025 fd = -1;
6026 }
6027 mddev->pers->quiesce(mddev, 0);
6028 }
6029 if (fd < 0) {
6030 if (mddev->bitmap_info.file) {
6031 restore_bitmap_write_access(mddev->bitmap_info.file);
6032 fput(mddev->bitmap_info.file);
6033 }
6034 mddev->bitmap_info.file = NULL;
6035 }
6036
6037 return err;
6038}
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053static int set_array_info(struct mddev * mddev, mdu_array_info_t *info)
6054{
6055
6056 if (info->raid_disks == 0) {
6057
6058 if (info->major_version < 0 ||
6059 info->major_version >= ARRAY_SIZE(super_types) ||
6060 super_types[info->major_version].name == NULL) {
6061
6062 printk(KERN_INFO
6063 "md: superblock version %d not known\n",
6064 info->major_version);
6065 return -EINVAL;
6066 }
6067 mddev->major_version = info->major_version;
6068 mddev->minor_version = info->minor_version;
6069 mddev->patch_version = info->patch_version;
6070 mddev->persistent = !info->not_persistent;
6071
6072
6073
6074 mddev->ctime = get_seconds();
6075 return 0;
6076 }
6077 mddev->major_version = MD_MAJOR_VERSION;
6078 mddev->minor_version = MD_MINOR_VERSION;
6079 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6080 mddev->ctime = get_seconds();
6081
6082 mddev->level = info->level;
6083 mddev->clevel[0] = 0;
6084 mddev->dev_sectors = 2 * (sector_t)info->size;
6085 mddev->raid_disks = info->raid_disks;
6086
6087
6088
6089 if (info->state & (1<<MD_SB_CLEAN))
6090 mddev->recovery_cp = MaxSector;
6091 else
6092 mddev->recovery_cp = 0;
6093 mddev->persistent = ! info->not_persistent;
6094 mddev->external = 0;
6095
6096 mddev->layout = info->layout;
6097 mddev->chunk_sectors = info->chunk_size >> 9;
6098
6099 mddev->max_disks = MD_SB_DISKS;
6100
6101 if (mddev->persistent)
6102 mddev->flags = 0;
6103 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6104
6105 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6106 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6107 mddev->bitmap_info.offset = 0;
6108
6109 mddev->reshape_position = MaxSector;
6110
6111
6112
6113
6114 get_random_bytes(mddev->uuid, 16);
6115
6116 mddev->new_level = mddev->level;
6117 mddev->new_chunk_sectors = mddev->chunk_sectors;
6118 mddev->new_layout = mddev->layout;
6119 mddev->delta_disks = 0;
6120 mddev->reshape_backwards = 0;
6121
6122 return 0;
6123}
6124
6125void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6126{
6127 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
6128
6129 if (mddev->external_size)
6130 return;
6131
6132 mddev->array_sectors = array_sectors;
6133}
6134EXPORT_SYMBOL(md_set_array_sectors);
6135
6136static int update_size(struct mddev *mddev, sector_t num_sectors)
6137{
6138 struct md_rdev *rdev;
6139 int rv;
6140 int fit = (num_sectors == 0);
6141
6142 if (mddev->pers->resize == NULL)
6143 return -EINVAL;
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153 if (mddev->sync_thread)
6154 return -EBUSY;
6155
6156 rdev_for_each(rdev, mddev) {
6157 sector_t avail = rdev->sectors;
6158
6159 if (fit && (num_sectors == 0 || num_sectors > avail))
6160 num_sectors = avail;
6161 if (avail < num_sectors)
6162 return -ENOSPC;
6163 }
6164 rv = mddev->pers->resize(mddev, num_sectors);
6165 if (!rv)
6166 revalidate_disk(mddev->gendisk);
6167 return rv;
6168}
6169
6170static int update_raid_disks(struct mddev *mddev, int raid_disks)
6171{
6172 int rv;
6173 struct md_rdev *rdev;
6174
6175 if (mddev->pers->check_reshape == NULL)
6176 return -EINVAL;
6177 if (raid_disks <= 0 ||
6178 (mddev->max_disks && raid_disks >= mddev->max_disks))
6179 return -EINVAL;
6180 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
6181 return -EBUSY;
6182
6183 rdev_for_each(rdev, mddev) {
6184 if (mddev->raid_disks < raid_disks &&
6185 rdev->data_offset < rdev->new_data_offset)
6186 return -EINVAL;
6187 if (mddev->raid_disks > raid_disks &&
6188 rdev->data_offset > rdev->new_data_offset)
6189 return -EINVAL;
6190 }
6191
6192 mddev->delta_disks = raid_disks - mddev->raid_disks;
6193 if (mddev->delta_disks < 0)
6194 mddev->reshape_backwards = 1;
6195 else if (mddev->delta_disks > 0)
6196 mddev->reshape_backwards = 0;
6197
6198 rv = mddev->pers->check_reshape(mddev);
6199 if (rv < 0) {
6200 mddev->delta_disks = 0;
6201 mddev->reshape_backwards = 0;
6202 }
6203 return rv;
6204}
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6216{
6217 int rv = 0;
6218 int cnt = 0;
6219 int state = 0;
6220
6221
6222 if (mddev->bitmap && mddev->bitmap_info.offset)
6223 state |= (1 << MD_SB_BITMAP_PRESENT);
6224
6225 if (mddev->major_version != info->major_version ||
6226 mddev->minor_version != info->minor_version ||
6227
6228 mddev->ctime != info->ctime ||
6229 mddev->level != info->level ||
6230
6231 !mddev->persistent != info->not_persistent||
6232 mddev->chunk_sectors != info->chunk_size >> 9 ||
6233
6234 ((state^info->state) & 0xfffffe00)
6235 )
6236 return -EINVAL;
6237
6238 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6239 cnt++;
6240 if (mddev->raid_disks != info->raid_disks)
6241 cnt++;
6242 if (mddev->layout != info->layout)
6243 cnt++;
6244 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
6245 cnt++;
6246 if (cnt == 0)
6247 return 0;
6248 if (cnt > 1)
6249 return -EINVAL;
6250
6251 if (mddev->layout != info->layout) {
6252
6253
6254
6255
6256 if (mddev->pers->check_reshape == NULL)
6257 return -EINVAL;
6258 else {
6259 mddev->new_layout = info->layout;
6260 rv = mddev->pers->check_reshape(mddev);
6261 if (rv)
6262 mddev->new_layout = mddev->layout;
6263 return rv;
6264 }
6265 }
6266 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6267 rv = update_size(mddev, (sector_t)info->size * 2);
6268
6269 if (mddev->raid_disks != info->raid_disks)
6270 rv = update_raid_disks(mddev, info->raid_disks);
6271
6272 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6273 if (mddev->pers->quiesce == NULL)
6274 return -EINVAL;
6275 if (mddev->recovery || mddev->sync_thread)
6276 return -EBUSY;
6277 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6278
6279 if (mddev->bitmap)
6280 return -EEXIST;
6281 if (mddev->bitmap_info.default_offset == 0)
6282 return -EINVAL;
6283 mddev->bitmap_info.offset =
6284 mddev->bitmap_info.default_offset;
6285 mddev->bitmap_info.space =
6286 mddev->bitmap_info.default_space;
6287 mddev->pers->quiesce(mddev, 1);
6288 rv = bitmap_create(mddev);
6289 if (!rv)
6290 rv = bitmap_load(mddev);
6291 if (rv)
6292 bitmap_destroy(mddev);
6293 mddev->pers->quiesce(mddev, 0);
6294 } else {
6295
6296 if (!mddev->bitmap)
6297 return -ENOENT;
6298 if (mddev->bitmap->storage.file)
6299 return -EINVAL;
6300 mddev->pers->quiesce(mddev, 1);
6301 bitmap_destroy(mddev);
6302 mddev->pers->quiesce(mddev, 0);
6303 mddev->bitmap_info.offset = 0;
6304 }
6305 }
6306 md_update_sb(mddev, 1);
6307 return rv;
6308}
6309
6310static int set_disk_faulty(struct mddev *mddev, dev_t dev)
6311{
6312 struct md_rdev *rdev;
6313 int err = 0;
6314
6315 if (mddev->pers == NULL)
6316 return -ENODEV;
6317
6318 rcu_read_lock();
6319 rdev = find_rdev_rcu(mddev, dev);
6320 if (!rdev)
6321 err = -ENODEV;
6322 else {
6323 md_error(mddev, rdev);
6324 if (!test_bit(Faulty, &rdev->flags))
6325 err = -EBUSY;
6326 }
6327 rcu_read_unlock();
6328 return err;
6329}
6330
6331
6332
6333
6334
6335
6336
6337static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
6338{
6339 struct mddev *mddev = bdev->bd_disk->private_data;
6340
6341 geo->heads = 2;
6342 geo->sectors = 4;
6343 geo->cylinders = mddev->array_sectors / 8;
6344 return 0;
6345}
6346
6347static inline bool md_ioctl_valid(unsigned int cmd)
6348{
6349 switch (cmd) {
6350 case ADD_NEW_DISK:
6351 case BLKROSET:
6352 case GET_ARRAY_INFO:
6353 case GET_BITMAP_FILE:
6354 case GET_DISK_INFO:
6355 case HOT_ADD_DISK:
6356 case HOT_REMOVE_DISK:
6357 case PRINT_RAID_DEBUG:
6358 case RAID_AUTORUN:
6359 case RAID_VERSION:
6360 case RESTART_ARRAY_RW:
6361 case RUN_ARRAY:
6362 case SET_ARRAY_INFO:
6363 case SET_BITMAP_FILE:
6364 case SET_DISK_FAULTY:
6365 case STOP_ARRAY:
6366 case STOP_ARRAY_RO:
6367 return true;
6368 default:
6369 return false;
6370 }
6371}
6372
6373static int md_ioctl(struct block_device *bdev, fmode_t mode,
6374 unsigned int cmd, unsigned long arg)
6375{
6376 int err = 0;
6377 void __user *argp = (void __user *)arg;
6378 struct mddev *mddev = NULL;
6379 int ro;
6380
6381 if (!md_ioctl_valid(cmd))
6382 return -ENOTTY;
6383
6384 switch (cmd) {
6385 case RAID_VERSION:
6386 case GET_ARRAY_INFO:
6387 case GET_DISK_INFO:
6388 break;
6389 default:
6390 if (!capable(CAP_SYS_ADMIN))
6391 return -EACCES;
6392 }
6393
6394
6395
6396
6397
6398 switch (cmd) {
6399 case RAID_VERSION:
6400 err = get_version(argp);
6401 goto done;
6402
6403 case PRINT_RAID_DEBUG:
6404 err = 0;
6405 md_print_devices();
6406 goto done;
6407
6408#ifndef MODULE
6409 case RAID_AUTORUN:
6410 err = 0;
6411 autostart_arrays(arg);
6412 goto done;
6413#endif
6414 default:;
6415 }
6416
6417
6418
6419
6420
6421 mddev = bdev->bd_disk->private_data;
6422
6423 if (!mddev) {
6424 BUG();
6425 goto abort;
6426 }
6427
6428
6429 switch (cmd) {
6430 case GET_ARRAY_INFO:
6431 if (!mddev->raid_disks && !mddev->external)
6432 err = -ENODEV;
6433 else
6434 err = get_array_info(mddev, argp);
6435 goto abort;
6436
6437 case GET_DISK_INFO:
6438 if (!mddev->raid_disks && !mddev->external)
6439 err = -ENODEV;
6440 else
6441 err = get_disk_info(mddev, argp);
6442 goto abort;
6443
6444 case SET_DISK_FAULTY:
6445 err = set_disk_faulty(mddev, new_decode_dev(arg));
6446 goto abort;
6447 }
6448
6449 if (cmd == ADD_NEW_DISK)
6450
6451 flush_workqueue(md_misc_wq);
6452
6453 if (cmd == HOT_REMOVE_DISK)
6454
6455 wait_event_interruptible_timeout(mddev->sb_wait,
6456 !test_bit(MD_RECOVERY_NEEDED,
6457 &mddev->flags),
6458 msecs_to_jiffies(5000));
6459 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
6460
6461
6462
6463 mutex_lock(&mddev->open_mutex);
6464 if (atomic_read(&mddev->openers) > 1) {
6465 mutex_unlock(&mddev->open_mutex);
6466 err = -EBUSY;
6467 goto abort;
6468 }
6469 set_bit(MD_STILL_CLOSED, &mddev->flags);
6470 mutex_unlock(&mddev->open_mutex);
6471 sync_blockdev(bdev);
6472 }
6473 err = mddev_lock(mddev);
6474 if (err) {
6475 printk(KERN_INFO
6476 "md: ioctl lock interrupted, reason %d, cmd %d\n",
6477 err, cmd);
6478 goto abort;
6479 }
6480
6481 if (cmd == SET_ARRAY_INFO) {
6482 mdu_array_info_t info;
6483 if (!arg)
6484 memset(&info, 0, sizeof(info));
6485 else if (copy_from_user(&info, argp, sizeof(info))) {
6486 err = -EFAULT;
6487 goto abort_unlock;
6488 }
6489 if (mddev->pers) {
6490 err = update_array_info(mddev, &info);
6491 if (err) {
6492 printk(KERN_WARNING "md: couldn't update"
6493 " array info. %d\n", err);
6494 goto abort_unlock;
6495 }
6496 goto done_unlock;
6497 }
6498 if (!list_empty(&mddev->disks)) {
6499 printk(KERN_WARNING
6500 "md: array %s already has disks!\n",
6501 mdname(mddev));
6502 err = -EBUSY;
6503 goto abort_unlock;
6504 }
6505 if (mddev->raid_disks) {
6506 printk(KERN_WARNING
6507 "md: array %s already initialised!\n",
6508 mdname(mddev));
6509 err = -EBUSY;
6510 goto abort_unlock;
6511 }
6512 err = set_array_info(mddev, &info);
6513 if (err) {
6514 printk(KERN_WARNING "md: couldn't set"
6515 " array info. %d\n", err);
6516 goto abort_unlock;
6517 }
6518 goto done_unlock;
6519 }
6520
6521
6522
6523
6524
6525
6526 if ((!mddev->raid_disks && !mddev->external)
6527 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
6528 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
6529 && cmd != GET_BITMAP_FILE) {
6530 err = -ENODEV;
6531 goto abort_unlock;
6532 }
6533
6534
6535
6536
6537 switch (cmd) {
6538 case GET_BITMAP_FILE:
6539 err = get_bitmap_file(mddev, argp);
6540 goto done_unlock;
6541
6542 case RESTART_ARRAY_RW:
6543 err = restart_array(mddev);
6544 goto done_unlock;
6545
6546 case STOP_ARRAY:
6547 err = do_md_stop(mddev, 0, bdev);
6548 goto done_unlock;
6549
6550 case STOP_ARRAY_RO:
6551 err = md_set_readonly(mddev, bdev);
6552 goto done_unlock;
6553
6554 case HOT_REMOVE_DISK:
6555 err = hot_remove_disk(mddev, new_decode_dev(arg));
6556 goto done_unlock;
6557
6558 case ADD_NEW_DISK:
6559
6560
6561
6562
6563 if (mddev->pers) {
6564 mdu_disk_info_t info;
6565 if (copy_from_user(&info, argp, sizeof(info)))
6566 err = -EFAULT;
6567 else if (!(info.state & (1<<MD_DISK_SYNC)))
6568
6569 break;
6570 else
6571 err = add_new_disk(mddev, &info);
6572 goto done_unlock;
6573 }
6574 break;
6575
6576 case BLKROSET:
6577 if (get_user(ro, (int __user *)(arg))) {
6578 err = -EFAULT;
6579 goto done_unlock;
6580 }
6581 err = -EINVAL;
6582
6583
6584
6585
6586 if (ro)
6587 goto done_unlock;
6588
6589
6590 if (mddev->ro != 1)
6591 goto done_unlock;
6592
6593
6594
6595
6596 if (mddev->pers) {
6597 err = restart_array(mddev);
6598 if (err == 0) {
6599 mddev->ro = 2;
6600 set_disk_ro(mddev->gendisk, 0);
6601 }
6602 }
6603 goto done_unlock;
6604 }
6605
6606
6607
6608
6609
6610
6611
6612
6613 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
6614 if (mddev->ro == 2) {
6615 mddev->ro = 0;
6616 sysfs_notify_dirent_safe(mddev->sysfs_state);
6617 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6618
6619
6620
6621
6622 if (test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
6623 mddev_unlock(mddev);
6624 wait_event(mddev->sb_wait,
6625 !test_bit(MD_CHANGE_DEVS, &mddev->flags) &&
6626 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
6627 mddev_lock_nointr(mddev);
6628 }
6629 } else {
6630 err = -EROFS;
6631 goto abort_unlock;
6632 }
6633 }
6634
6635 switch (cmd) {
6636 case ADD_NEW_DISK:
6637 {
6638 mdu_disk_info_t info;
6639 if (copy_from_user(&info, argp, sizeof(info)))
6640 err = -EFAULT;
6641 else
6642 err = add_new_disk(mddev, &info);
6643 goto done_unlock;
6644 }
6645
6646 case HOT_ADD_DISK:
6647 err = hot_add_disk(mddev, new_decode_dev(arg));
6648 goto done_unlock;
6649
6650 case RUN_ARRAY:
6651 err = do_md_run(mddev);
6652 goto done_unlock;
6653
6654 case SET_BITMAP_FILE:
6655 err = set_bitmap_file(mddev, (int)arg);
6656 goto done_unlock;
6657
6658 default:
6659 err = -EINVAL;
6660 goto abort_unlock;
6661 }
6662
6663done_unlock:
6664abort_unlock:
6665 if (mddev->hold_active == UNTIL_IOCTL &&
6666 err != -EINVAL)
6667 mddev->hold_active = 0;
6668 mddev_unlock(mddev);
6669
6670 return err;
6671done:
6672 if (err)
6673 MD_BUG();
6674abort:
6675 return err;
6676}
6677#ifdef CONFIG_COMPAT
6678static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
6679 unsigned int cmd, unsigned long arg)
6680{
6681 switch (cmd) {
6682 case HOT_REMOVE_DISK:
6683 case HOT_ADD_DISK:
6684 case SET_DISK_FAULTY:
6685 case SET_BITMAP_FILE:
6686
6687 break;
6688 default:
6689 arg = (unsigned long)compat_ptr(arg);
6690 break;
6691 }
6692
6693 return md_ioctl(bdev, mode, cmd, arg);
6694}
6695#endif
6696
6697static int md_open(struct block_device *bdev, fmode_t mode)
6698{
6699
6700
6701
6702
6703 struct mddev *mddev = mddev_find(bdev->bd_dev);
6704 int err;
6705
6706 if (!mddev)
6707 return -ENODEV;
6708
6709 if (mddev->gendisk != bdev->bd_disk) {
6710
6711
6712
6713 mddev_put(mddev);
6714
6715 flush_workqueue(md_misc_wq);
6716
6717 return -ERESTARTSYS;
6718 }
6719 BUG_ON(mddev != bdev->bd_disk->private_data);
6720
6721 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
6722 goto out;
6723
6724 err = 0;
6725 atomic_inc(&mddev->openers);
6726 clear_bit(MD_STILL_CLOSED, &mddev->flags);
6727 mutex_unlock(&mddev->open_mutex);
6728
6729 check_disk_change(bdev);
6730 out:
6731 return err;
6732}
6733
6734static void md_release(struct gendisk *disk, fmode_t mode)
6735{
6736 struct mddev *mddev = disk->private_data;
6737
6738 BUG_ON(!mddev);
6739 atomic_dec(&mddev->openers);
6740 mddev_put(mddev);
6741}
6742
6743static int md_media_changed(struct gendisk *disk)
6744{
6745 struct mddev *mddev = disk->private_data;
6746
6747 return mddev->changed;
6748}
6749
6750static int md_revalidate(struct gendisk *disk)
6751{
6752 struct mddev *mddev = disk->private_data;
6753
6754 mddev->changed = 0;
6755 return 0;
6756}
6757static const struct block_device_operations md_fops =
6758{
6759 .owner = THIS_MODULE,
6760 .open = md_open,
6761 .release = md_release,
6762 .ioctl = md_ioctl,
6763#ifdef CONFIG_COMPAT
6764 .compat_ioctl = md_compat_ioctl,
6765#endif
6766 .getgeo = md_getgeo,
6767 .media_changed = md_media_changed,
6768 .revalidate_disk= md_revalidate,
6769};
6770
6771static int md_thread(void * arg)
6772{
6773 struct md_thread *thread = arg;
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787 allow_signal(SIGKILL);
6788 while (!kthread_should_stop()) {
6789
6790
6791
6792
6793
6794
6795 if (signal_pending(current))
6796 flush_signals(current);
6797
6798 wait_event_interruptible_timeout
6799 (thread->wqueue,
6800 test_bit(THREAD_WAKEUP, &thread->flags)
6801 || kthread_should_stop(),
6802 thread->timeout);
6803
6804 clear_bit(THREAD_WAKEUP, &thread->flags);
6805 if (!kthread_should_stop())
6806 thread->run(thread);
6807 }
6808
6809 return 0;
6810}
6811
6812void md_wakeup_thread(struct md_thread *thread)
6813{
6814 if (thread) {
6815 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
6816 set_bit(THREAD_WAKEUP, &thread->flags);
6817 wake_up(&thread->wqueue);
6818 }
6819}
6820
6821struct md_thread *md_register_thread(void (*run) (struct md_thread *),
6822 struct mddev *mddev, const char *name)
6823{
6824 struct md_thread *thread;
6825
6826 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
6827 if (!thread)
6828 return NULL;
6829
6830 init_waitqueue_head(&thread->wqueue);
6831
6832 thread->run = run;
6833 thread->mddev = mddev;
6834 thread->timeout = MAX_SCHEDULE_TIMEOUT;
6835 thread->tsk = kthread_run(md_thread, thread,
6836 "%s_%s",
6837 mdname(thread->mddev),
6838 name);
6839 if (IS_ERR(thread->tsk)) {
6840 kfree(thread);
6841 return NULL;
6842 }
6843 return thread;
6844}
6845
6846void md_unregister_thread(struct md_thread **threadp)
6847{
6848 struct md_thread *thread = *threadp;
6849 if (!thread)
6850 return;
6851 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
6852
6853
6854
6855 spin_lock(&pers_lock);
6856 *threadp = NULL;
6857 spin_unlock(&pers_lock);
6858
6859 kthread_stop(thread->tsk);
6860 kfree(thread);
6861}
6862
6863void md_error(struct mddev *mddev, struct md_rdev *rdev)
6864{
6865 if (!mddev) {
6866 MD_BUG();
6867 return;
6868 }
6869
6870 if (!rdev || test_bit(Faulty, &rdev->flags))
6871 return;
6872
6873 if (!mddev->pers || !mddev->pers->error_handler)
6874 return;
6875 mddev->pers->error_handler(mddev,rdev);
6876 if (mddev->degraded)
6877 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6878 sysfs_notify_dirent_safe(rdev->sysfs_state);
6879 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6880 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6881 md_wakeup_thread(mddev->thread);
6882 if (mddev->event_work.func)
6883 queue_work(md_misc_wq, &mddev->event_work);
6884 md_new_event_inintr(mddev);
6885}
6886
6887
6888
6889static void status_unused(struct seq_file *seq)
6890{
6891 int i = 0;
6892 struct md_rdev *rdev;
6893
6894 seq_printf(seq, "unused devices: ");
6895
6896 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
6897 char b[BDEVNAME_SIZE];
6898 i++;
6899 seq_printf(seq, "%s ",
6900 bdevname(rdev->bdev,b));
6901 }
6902 if (!i)
6903 seq_printf(seq, "<none>");
6904
6905 seq_printf(seq, "\n");
6906}
6907
6908
6909static void status_resync(struct seq_file *seq, struct mddev * mddev)
6910{
6911 sector_t max_sectors, resync, res;
6912 unsigned long dt, db;
6913 sector_t rt;
6914 int scale;
6915 unsigned int per_milli;
6916
6917 if (mddev->curr_resync <= 3)
6918 resync = 0;
6919 else
6920 resync = mddev->curr_resync
6921 - atomic_read(&mddev->recovery_active);
6922
6923 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
6924 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
6925 max_sectors = mddev->resync_max_sectors;
6926 else
6927 max_sectors = mddev->dev_sectors;
6928
6929
6930
6931
6932 if (!max_sectors) {
6933 MD_BUG();
6934 return;
6935 }
6936
6937
6938
6939
6940
6941 scale = 10;
6942 if (sizeof(sector_t) > sizeof(unsigned long)) {
6943 while ( max_sectors/2 > (1ULL<<(scale+32)))
6944 scale++;
6945 }
6946 res = (resync>>scale)*1000;
6947 sector_div(res, (u32)((max_sectors>>scale)+1));
6948
6949 per_milli = res;
6950 {
6951 int i, x = per_milli/50, y = 20-x;
6952 seq_printf(seq, "[");
6953 for (i = 0; i < x; i++)
6954 seq_printf(seq, "=");
6955 seq_printf(seq, ">");
6956 for (i = 0; i < y; i++)
6957 seq_printf(seq, ".");
6958 seq_printf(seq, "] ");
6959 }
6960 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
6961 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
6962 "reshape" :
6963 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
6964 "check" :
6965 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
6966 "resync" : "recovery"))),
6967 per_milli/10, per_milli % 10,
6968 (unsigned long long) resync/2,
6969 (unsigned long long) max_sectors/2);
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985 dt = ((jiffies - mddev->resync_mark) / HZ);
6986 if (!dt) dt++;
6987 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
6988 - mddev->resync_mark_cnt;
6989
6990 rt = max_sectors - resync;
6991 sector_div(rt, db/32+1);
6992 rt *= dt;
6993 rt >>= 5;
6994
6995 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
6996 ((unsigned long)rt % 60)/6);
6997
6998 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
6999}
7000
7001static void *md_seq_start(struct seq_file *seq, loff_t *pos)
7002{
7003 struct list_head *tmp;
7004 loff_t l = *pos;
7005 struct mddev *mddev;
7006
7007 if (l >= 0x10000)
7008 return NULL;
7009 if (!l--)
7010
7011 return (void*)1;
7012
7013 spin_lock(&all_mddevs_lock);
7014 list_for_each(tmp,&all_mddevs)
7015 if (!l--) {
7016 mddev = list_entry(tmp, struct mddev, all_mddevs);
7017 mddev_get(mddev);
7018 spin_unlock(&all_mddevs_lock);
7019 return mddev;
7020 }
7021 spin_unlock(&all_mddevs_lock);
7022 if (!l--)
7023 return (void*)2;
7024 return NULL;
7025}
7026
7027static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
7028{
7029 struct list_head *tmp;
7030 struct mddev *next_mddev, *mddev = v;
7031
7032 ++*pos;
7033 if (v == (void*)2)
7034 return NULL;
7035
7036 spin_lock(&all_mddevs_lock);
7037 if (v == (void*)1)
7038 tmp = all_mddevs.next;
7039 else
7040 tmp = mddev->all_mddevs.next;
7041 if (tmp != &all_mddevs)
7042 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
7043 else {
7044 next_mddev = (void*)2;
7045 *pos = 0x10000;
7046 }
7047 spin_unlock(&all_mddevs_lock);
7048
7049 if (v != (void*)1)
7050 mddev_put(mddev);
7051 return next_mddev;
7052
7053}
7054
7055static void md_seq_stop(struct seq_file *seq, void *v)
7056{
7057 struct mddev *mddev = v;
7058
7059 if (mddev && v != (void*)1 && v != (void*)2)
7060 mddev_put(mddev);
7061}
7062
7063static int md_seq_show(struct seq_file *seq, void *v)
7064{
7065 struct mddev *mddev = v;
7066 sector_t sectors;
7067 struct md_rdev *rdev;
7068
7069 if (v == (void*)1) {
7070 struct md_personality *pers;
7071 seq_printf(seq, "Personalities : ");
7072 spin_lock(&pers_lock);
7073 list_for_each_entry(pers, &pers_list, list)
7074 seq_printf(seq, "[%s] ", pers->name);
7075
7076 spin_unlock(&pers_lock);
7077 seq_printf(seq, "\n");
7078 seq->poll_event = atomic_read(&md_event_count);
7079 return 0;
7080 }
7081 if (v == (void*)2) {
7082 status_unused(seq);
7083 return 0;
7084 }
7085
7086 if (mddev_lock(mddev) < 0)
7087 return -EINTR;
7088
7089 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
7090 seq_printf(seq, "%s : %sactive", mdname(mddev),
7091 mddev->pers ? "" : "in");
7092 if (mddev->pers) {
7093 if (mddev->ro==1)
7094 seq_printf(seq, " (read-only)");
7095 if (mddev->ro==2)
7096 seq_printf(seq, " (auto-read-only)");
7097 seq_printf(seq, " %s", mddev->pers->name);
7098 }
7099
7100 sectors = 0;
7101 rdev_for_each(rdev, mddev) {
7102 char b[BDEVNAME_SIZE];
7103 seq_printf(seq, " %s[%d]",
7104 bdevname(rdev->bdev,b), rdev->desc_nr);
7105 if (test_bit(WriteMostly, &rdev->flags))
7106 seq_printf(seq, "(W)");
7107 if (test_bit(Faulty, &rdev->flags)) {
7108 seq_printf(seq, "(F)");
7109 continue;
7110 }
7111 if (rdev->raid_disk < 0)
7112 seq_printf(seq, "(S)");
7113 if (test_bit(Replacement, &rdev->flags))
7114 seq_printf(seq, "(R)");
7115 sectors += rdev->sectors;
7116 }
7117
7118 if (!list_empty(&mddev->disks)) {
7119 if (mddev->pers)
7120 seq_printf(seq, "\n %llu blocks",
7121 (unsigned long long)
7122 mddev->array_sectors / 2);
7123 else
7124 seq_printf(seq, "\n %llu blocks",
7125 (unsigned long long)sectors / 2);
7126 }
7127 if (mddev->persistent) {
7128 if (mddev->major_version != 0 ||
7129 mddev->minor_version != 90) {
7130 seq_printf(seq," super %d.%d",
7131 mddev->major_version,
7132 mddev->minor_version);
7133 }
7134 } else if (mddev->external)
7135 seq_printf(seq, " super external:%s",
7136 mddev->metadata_type);
7137 else
7138 seq_printf(seq, " super non-persistent");
7139
7140 if (mddev->pers) {
7141 mddev->pers->status(seq, mddev);
7142 seq_printf(seq, "\n ");
7143 if (mddev->pers->sync_request) {
7144 if (mddev->curr_resync > 2) {
7145 status_resync(seq, mddev);
7146 seq_printf(seq, "\n ");
7147 } else if (mddev->curr_resync >= 1)
7148 seq_printf(seq, "\tresync=DELAYED\n ");
7149 else if (mddev->recovery_cp < MaxSector)
7150 seq_printf(seq, "\tresync=PENDING\n ");
7151 }
7152 } else
7153 seq_printf(seq, "\n ");
7154
7155 bitmap_status(seq, mddev->bitmap);
7156
7157 seq_printf(seq, "\n");
7158 }
7159 mddev_unlock(mddev);
7160
7161 return 0;
7162}
7163
7164static const struct seq_operations md_seq_ops = {
7165 .start = md_seq_start,
7166 .next = md_seq_next,
7167 .stop = md_seq_stop,
7168 .show = md_seq_show,
7169};
7170
7171static int md_seq_open(struct inode *inode, struct file *file)
7172{
7173 struct seq_file *seq;
7174 int error;
7175
7176 error = seq_open(file, &md_seq_ops);
7177 if (error)
7178 return error;
7179
7180 seq = file->private_data;
7181 seq->poll_event = atomic_read(&md_event_count);
7182 return error;
7183}
7184
7185static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
7186{
7187 struct seq_file *seq = filp->private_data;
7188 int mask;
7189
7190 poll_wait(filp, &md_event_waiters, wait);
7191
7192
7193 mask = POLLIN | POLLRDNORM;
7194
7195 if (seq->poll_event != atomic_read(&md_event_count))
7196 mask |= POLLERR | POLLPRI;
7197 return mask;
7198}
7199
7200static const struct file_operations md_seq_fops = {
7201 .owner = THIS_MODULE,
7202 .open = md_seq_open,
7203 .read = seq_read,
7204 .llseek = seq_lseek,
7205 .release = seq_release_private,
7206 .poll = mdstat_poll,
7207};
7208
7209int register_md_personality(struct md_personality *p)
7210{
7211 spin_lock(&pers_lock);
7212 list_add_tail(&p->list, &pers_list);
7213 printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level);
7214 spin_unlock(&pers_lock);
7215 return 0;
7216}
7217
7218int unregister_md_personality(struct md_personality *p)
7219{
7220 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
7221 spin_lock(&pers_lock);
7222 list_del_init(&p->list);
7223 spin_unlock(&pers_lock);
7224 return 0;
7225}
7226
7227static int is_mddev_idle(struct mddev *mddev, int init)
7228{
7229 struct md_rdev * rdev;
7230 int idle;
7231 int curr_events;
7232
7233 idle = 1;
7234 rcu_read_lock();
7235 rdev_for_each_rcu(rdev, mddev) {
7236 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
7237 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
7238 (int)part_stat_read(&disk->part0, sectors[1]) -
7239 atomic_read(&disk->sync_io);
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259
7260
7261
7262 if (init || curr_events - rdev->last_events > 64) {
7263 rdev->last_events = curr_events;
7264 idle = 0;
7265 }
7266 }
7267 rcu_read_unlock();
7268 return idle;
7269}
7270
7271void md_done_sync(struct mddev *mddev, int blocks, int ok)
7272{
7273
7274 atomic_sub(blocks, &mddev->recovery_active);
7275 wake_up(&mddev->recovery_wait);
7276 if (!ok) {
7277 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7278 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
7279 md_wakeup_thread(mddev->thread);
7280
7281 }
7282}
7283
7284
7285
7286
7287
7288
7289
7290void md_write_start(struct mddev *mddev, struct bio *bi)
7291{
7292 int did_change = 0;
7293 if (bio_data_dir(bi) != WRITE)
7294 return;
7295
7296 BUG_ON(mddev->ro == 1);
7297 if (mddev->ro == 2) {
7298
7299 mddev->ro = 0;
7300 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7301 md_wakeup_thread(mddev->thread);
7302 md_wakeup_thread(mddev->sync_thread);
7303 did_change = 1;
7304 }
7305 atomic_inc(&mddev->writes_pending);
7306 if (mddev->safemode == 1)
7307 mddev->safemode = 0;
7308 if (mddev->in_sync) {
7309 spin_lock_irq(&mddev->write_lock);
7310 if (mddev->in_sync) {
7311 mddev->in_sync = 0;
7312 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7313 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7314 md_wakeup_thread(mddev->thread);
7315 did_change = 1;
7316 }
7317 spin_unlock_irq(&mddev->write_lock);
7318 }
7319 if (did_change)
7320 sysfs_notify_dirent_safe(mddev->sysfs_state);
7321 wait_event(mddev->sb_wait,
7322 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
7323}
7324
7325void md_write_end(struct mddev *mddev)
7326{
7327 if (atomic_dec_and_test(&mddev->writes_pending)) {
7328 if (mddev->safemode == 2)
7329 md_wakeup_thread(mddev->thread);
7330 else if (mddev->safemode_delay)
7331 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
7332 }
7333}
7334
7335
7336
7337
7338
7339
7340
7341
7342
7343
7344int md_allow_write(struct mddev *mddev)
7345{
7346 if (!mddev->pers)
7347 return 0;
7348 if (mddev->ro)
7349 return 0;
7350 if (!mddev->pers->sync_request)
7351 return 0;
7352
7353 spin_lock_irq(&mddev->write_lock);
7354 if (mddev->in_sync) {
7355 mddev->in_sync = 0;
7356 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7357 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7358 if (mddev->safemode_delay &&
7359 mddev->safemode == 0)
7360 mddev->safemode = 1;
7361 spin_unlock_irq(&mddev->write_lock);
7362 md_update_sb(mddev, 0);
7363 sysfs_notify_dirent_safe(mddev->sysfs_state);
7364 } else
7365 spin_unlock_irq(&mddev->write_lock);
7366
7367 if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
7368 return -EAGAIN;
7369 else
7370 return 0;
7371}
7372EXPORT_SYMBOL_GPL(md_allow_write);
7373
7374#define SYNC_MARKS 10
7375#define SYNC_MARK_STEP (3*HZ)
7376#define UPDATE_FREQUENCY (5*60*HZ)
7377void md_do_sync(struct md_thread *thread)
7378{
7379 struct mddev *mddev = thread->mddev;
7380 struct mddev *mddev2;
7381 unsigned int currspeed = 0,
7382 window;
7383 sector_t max_sectors,j, io_sectors;
7384 unsigned long mark[SYNC_MARKS];
7385 unsigned long update_time;
7386 sector_t mark_cnt[SYNC_MARKS];
7387 int last_mark,m;
7388 struct list_head *tmp;
7389 sector_t last_check;
7390 int skipped = 0;
7391 struct md_rdev *rdev;
7392 char *desc, *action = NULL;
7393 struct blk_plug plug;
7394
7395
7396 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7397 return;
7398 if (mddev->ro)
7399 return;
7400
7401 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7402 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
7403 desc = "data-check";
7404 action = "check";
7405 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7406 desc = "requested-resync";
7407 action = "repair";
7408 } else
7409 desc = "resync";
7410 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7411 desc = "reshape";
7412 else
7413 desc = "recovery";
7414
7415 mddev->last_sync_action = action ?: desc;
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429
7430
7431
7432
7433 do {
7434 mddev->curr_resync = 2;
7435
7436 try_again:
7437 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7438 goto skip;
7439 for_each_mddev(mddev2, tmp) {
7440 if (mddev2 == mddev)
7441 continue;
7442 if (!mddev->parallel_resync
7443 && mddev2->curr_resync
7444 && match_mddev_units(mddev, mddev2)) {
7445 DEFINE_WAIT(wq);
7446 if (mddev < mddev2 && mddev->curr_resync == 2) {
7447
7448 mddev->curr_resync = 1;
7449 wake_up(&resync_wait);
7450 }
7451 if (mddev > mddev2 && mddev->curr_resync == 1)
7452
7453
7454
7455 continue;
7456
7457
7458
7459
7460 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
7461 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7462 mddev2->curr_resync >= mddev->curr_resync) {
7463 printk(KERN_INFO "md: delaying %s of %s"
7464 " until %s has finished (they"
7465 " share one or more physical units)\n",
7466 desc, mdname(mddev), mdname(mddev2));
7467 mddev_put(mddev2);
7468 if (signal_pending(current))
7469 flush_signals(current);
7470 schedule();
7471 finish_wait(&resync_wait, &wq);
7472 goto try_again;
7473 }
7474 finish_wait(&resync_wait, &wq);
7475 }
7476 }
7477 } while (mddev->curr_resync < 2);
7478
7479 j = 0;
7480 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7481
7482
7483
7484 max_sectors = mddev->resync_max_sectors;
7485 atomic64_set(&mddev->resync_mismatches, 0);
7486
7487 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7488 j = mddev->resync_min;
7489 else if (!mddev->bitmap)
7490 j = mddev->recovery_cp;
7491
7492 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7493 max_sectors = mddev->resync_max_sectors;
7494 else {
7495
7496 max_sectors = mddev->dev_sectors;
7497 j = MaxSector;
7498 rcu_read_lock();
7499 rdev_for_each_rcu(rdev, mddev)
7500 if (rdev->raid_disk >= 0 &&
7501 !test_bit(Faulty, &rdev->flags) &&
7502 !test_bit(In_sync, &rdev->flags) &&
7503 rdev->recovery_offset < j)
7504 j = rdev->recovery_offset;
7505 rcu_read_unlock();
7506 }
7507
7508 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
7509 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
7510 " %d KB/sec/disk.\n", speed_min(mddev));
7511 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
7512 "(but not more than %d KB/sec) for %s.\n",
7513 speed_max(mddev), desc);
7514
7515 is_mddev_idle(mddev, 1);
7516
7517 io_sectors = 0;
7518 for (m = 0; m < SYNC_MARKS; m++) {
7519 mark[m] = jiffies;
7520 mark_cnt[m] = io_sectors;
7521 }
7522 last_mark = 0;
7523 mddev->resync_mark = mark[last_mark];
7524 mddev->resync_mark_cnt = mark_cnt[last_mark];
7525
7526
7527
7528
7529 window = 32*(PAGE_SIZE/512);
7530 printk(KERN_INFO "md: using %dk window, over a total of %lluk.\n",
7531 window/2, (unsigned long long)max_sectors/2);
7532
7533 atomic_set(&mddev->recovery_active, 0);
7534 last_check = 0;
7535
7536 if (j>2) {
7537 printk(KERN_INFO
7538 "md: resuming %s of %s from checkpoint.\n",
7539 desc, mdname(mddev));
7540 mddev->curr_resync = j;
7541 } else
7542 mddev->curr_resync = 3;
7543 mddev->curr_resync_completed = j;
7544 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7545 md_new_event(mddev);
7546 update_time = jiffies;
7547
7548 blk_start_plug(&plug);
7549 while (j < max_sectors) {
7550 sector_t sectors;
7551
7552 skipped = 0;
7553
7554 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7555 ((mddev->curr_resync > mddev->curr_resync_completed &&
7556 (mddev->curr_resync - mddev->curr_resync_completed)
7557 > (max_sectors >> 4)) ||
7558 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
7559 (j - mddev->curr_resync_completed)*2
7560 >= mddev->resync_max - mddev->curr_resync_completed
7561 )) {
7562
7563 wait_event(mddev->recovery_wait,
7564 atomic_read(&mddev->recovery_active) == 0);
7565 mddev->curr_resync_completed = j;
7566 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
7567 j > mddev->recovery_cp)
7568 mddev->recovery_cp = j;
7569 update_time = jiffies;
7570 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7571 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7572 }
7573
7574 while (j >= mddev->resync_max &&
7575 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7576
7577
7578
7579
7580 flush_signals(current);
7581 wait_event_interruptible(mddev->recovery_wait,
7582 mddev->resync_max > j
7583 || test_bit(MD_RECOVERY_INTR,
7584 &mddev->recovery));
7585 }
7586
7587 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7588 break;
7589
7590 sectors = mddev->pers->sync_request(mddev, j, &skipped,
7591 currspeed < speed_min(mddev));
7592 if (sectors == 0) {
7593 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7594 break;
7595 }
7596
7597 if (!skipped) {
7598 io_sectors += sectors;
7599 atomic_add(sectors, &mddev->recovery_active);
7600 }
7601
7602 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7603 break;
7604
7605 j += sectors;
7606 if (j > 2)
7607 mddev->curr_resync = j;
7608 mddev->curr_mark_cnt = io_sectors;
7609 if (last_check == 0)
7610
7611
7612
7613 md_new_event(mddev);
7614
7615 if (last_check + window > io_sectors || j == max_sectors)
7616 continue;
7617
7618 last_check = io_sectors;
7619 repeat:
7620 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
7621
7622 int next = (last_mark+1) % SYNC_MARKS;
7623
7624 mddev->resync_mark = mark[next];
7625 mddev->resync_mark_cnt = mark_cnt[next];
7626 mark[next] = jiffies;
7627 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
7628 last_mark = next;
7629 }
7630
7631 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7632 break;
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642 cond_resched();
7643
7644 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
7645 /((jiffies-mddev->resync_mark)/HZ +1) +1;
7646
7647 if (currspeed > speed_min(mddev)) {
7648 if ((currspeed > speed_max(mddev)) ||
7649 !is_mddev_idle(mddev, 0)) {
7650 msleep(500);
7651 goto repeat;
7652 }
7653 }
7654 }
7655 printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
7656 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
7657 ? "interrupted" : "done");
7658
7659
7660
7661 blk_finish_plug(&plug);
7662 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
7663
7664
7665 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
7666
7667 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
7668 mddev->curr_resync > 2) {
7669 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7670 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7671 if (mddev->curr_resync >= mddev->recovery_cp) {
7672 printk(KERN_INFO
7673 "md: checkpointing %s of %s.\n",
7674 desc, mdname(mddev));
7675 if (test_bit(MD_RECOVERY_ERROR,
7676 &mddev->recovery))
7677 mddev->recovery_cp =
7678 mddev->curr_resync_completed;
7679 else
7680 mddev->recovery_cp =
7681 mddev->curr_resync;
7682 }
7683 } else
7684 mddev->recovery_cp = MaxSector;
7685 } else {
7686 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7687 mddev->curr_resync = MaxSector;
7688 rcu_read_lock();
7689 rdev_for_each_rcu(rdev, mddev)
7690 if (rdev->raid_disk >= 0 &&
7691 mddev->delta_disks >= 0 &&
7692 !test_bit(Faulty, &rdev->flags) &&
7693 !test_bit(In_sync, &rdev->flags) &&
7694 rdev->recovery_offset < mddev->curr_resync)
7695 rdev->recovery_offset = mddev->curr_resync;
7696 rcu_read_unlock();
7697 }
7698 }
7699 skip:
7700 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7701
7702 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7703
7704 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7705 mddev->resync_min = 0;
7706 mddev->resync_max = MaxSector;
7707 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7708 mddev->resync_min = mddev->curr_resync_completed;
7709 mddev->curr_resync = 0;
7710 wake_up(&resync_wait);
7711 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
7712 md_wakeup_thread(mddev->thread);
7713 return;
7714}
7715EXPORT_SYMBOL_GPL(md_do_sync);
7716
7717static int remove_and_add_spares(struct mddev *mddev,
7718 struct md_rdev *this)
7719{
7720 struct md_rdev *rdev;
7721 int spares = 0;
7722 int removed = 0;
7723
7724 rdev_for_each(rdev, mddev)
7725 if ((this == NULL || rdev == this) &&
7726 rdev->raid_disk >= 0 &&
7727 !test_bit(Blocked, &rdev->flags) &&
7728 (test_bit(Faulty, &rdev->flags) ||
7729 ! test_bit(In_sync, &rdev->flags)) &&
7730 atomic_read(&rdev->nr_pending)==0) {
7731 if (mddev->pers->hot_remove_disk(
7732 mddev, rdev) == 0) {
7733 sysfs_unlink_rdev(mddev, rdev);
7734 rdev->raid_disk = -1;
7735 removed++;
7736 }
7737 }
7738 if (removed && mddev->kobj.sd)
7739 sysfs_notify(&mddev->kobj, NULL, "degraded");
7740
7741 if (this)
7742 goto no_add;
7743
7744 rdev_for_each(rdev, mddev) {
7745 if (rdev->raid_disk >= 0 &&
7746 !test_bit(In_sync, &rdev->flags) &&
7747 !test_bit(Faulty, &rdev->flags))
7748 spares++;
7749 if (rdev->raid_disk >= 0)
7750 continue;
7751 if (test_bit(Faulty, &rdev->flags))
7752 continue;
7753 if (mddev->ro &&
7754 ! (rdev->saved_raid_disk >= 0 &&
7755 !test_bit(Bitmap_sync, &rdev->flags)))
7756 continue;
7757
7758 if (rdev->saved_raid_disk < 0)
7759 rdev->recovery_offset = 0;
7760 if (mddev->pers->
7761 hot_add_disk(mddev, rdev) == 0) {
7762 if (sysfs_link_rdev(mddev, rdev))
7763 ;
7764 spares++;
7765 md_new_event(mddev);
7766 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7767 }
7768 }
7769no_add:
7770 if (removed)
7771 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7772 return spares;
7773}
7774
7775
7776
7777
7778
7779
7780
7781
7782
7783
7784
7785
7786
7787
7788
7789
7790
7791
7792
7793
7794
7795
7796
7797void md_check_recovery(struct mddev *mddev)
7798{
7799 if (mddev->suspended)
7800 return;
7801
7802 if (mddev->bitmap)
7803 bitmap_daemon_work(mddev);
7804
7805 if (signal_pending(current)) {
7806 if (mddev->pers->sync_request && !mddev->external) {
7807 printk(KERN_INFO "md: %s in immediate safe mode\n",
7808 mdname(mddev));
7809 mddev->safemode = 2;
7810 }
7811 flush_signals(current);
7812 }
7813
7814 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
7815 return;
7816 if ( ! (
7817 (mddev->flags & MD_UPDATE_SB_FLAGS & ~ (1<<MD_CHANGE_PENDING)) ||
7818 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7819 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
7820 (mddev->external == 0 && mddev->safemode == 1) ||
7821 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
7822 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
7823 ))
7824 return;
7825
7826 if (mddev_trylock(mddev)) {
7827 int spares = 0;
7828
7829 if (mddev->ro) {
7830
7831
7832
7833
7834
7835
7836
7837 remove_and_add_spares(mddev, NULL);
7838
7839
7840
7841 md_reap_sync_thread(mddev);
7842 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7843 goto unlock;
7844 }
7845
7846 if (!mddev->external) {
7847 int did_change = 0;
7848 spin_lock_irq(&mddev->write_lock);
7849 if (mddev->safemode &&
7850 !atomic_read(&mddev->writes_pending) &&
7851 !mddev->in_sync &&
7852 mddev->recovery_cp == MaxSector) {
7853 mddev->in_sync = 1;
7854 did_change = 1;
7855 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7856 }
7857 if (mddev->safemode == 1)
7858 mddev->safemode = 0;
7859 spin_unlock_irq(&mddev->write_lock);
7860 if (did_change)
7861 sysfs_notify_dirent_safe(mddev->sysfs_state);
7862 }
7863
7864 if (mddev->flags & MD_UPDATE_SB_FLAGS)
7865 md_update_sb(mddev, 0);
7866
7867 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
7868 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
7869
7870 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7871 goto unlock;
7872 }
7873 if (mddev->sync_thread) {
7874 md_reap_sync_thread(mddev);
7875 goto unlock;
7876 }
7877
7878
7879
7880 mddev->curr_resync_completed = 0;
7881 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7882
7883
7884
7885 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
7886 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
7887
7888 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7889 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
7890 goto unlock;
7891
7892
7893
7894
7895
7896
7897
7898 if (mddev->reshape_position != MaxSector) {
7899 if (mddev->pers->check_reshape == NULL ||
7900 mddev->pers->check_reshape(mddev) != 0)
7901
7902 goto unlock;
7903 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7904 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7905 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
7906 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7907 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7908 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7909 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7910 } else if (mddev->recovery_cp < MaxSector) {
7911 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7912 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7913 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
7914
7915 goto unlock;
7916
7917 if (mddev->pers->sync_request) {
7918 if (spares) {
7919
7920
7921
7922
7923 bitmap_write_all(mddev->bitmap);
7924 }
7925 mddev->sync_thread = md_register_thread(md_do_sync,
7926 mddev,
7927 "resync");
7928 if (!mddev->sync_thread) {
7929 printk(KERN_ERR "%s: could not start resync"
7930 " thread...\n",
7931 mdname(mddev));
7932
7933 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7934 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7935 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7936 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7937 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7938 } else
7939 md_wakeup_thread(mddev->sync_thread);
7940 sysfs_notify_dirent_safe(mddev->sysfs_action);
7941 md_new_event(mddev);
7942 }
7943 unlock:
7944 wake_up(&mddev->sb_wait);
7945
7946 if (!mddev->sync_thread) {
7947 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7948 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
7949 &mddev->recovery))
7950 if (mddev->sysfs_action)
7951 sysfs_notify_dirent_safe(mddev->sysfs_action);
7952 }
7953 mddev_unlock(mddev);
7954 }
7955}
7956
7957void md_reap_sync_thread(struct mddev *mddev)
7958{
7959 struct md_rdev *rdev;
7960
7961
7962 md_unregister_thread(&mddev->sync_thread);
7963 wake_up(&resync_wait);
7964 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7965 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7966
7967
7968 if (mddev->pers->spare_active(mddev)) {
7969 sysfs_notify(&mddev->kobj, NULL,
7970 "degraded");
7971 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7972 }
7973 }
7974 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7975 mddev->pers->finish_reshape)
7976 mddev->pers->finish_reshape(mddev);
7977
7978
7979
7980
7981 if (!mddev->degraded)
7982 rdev_for_each(rdev, mddev)
7983 rdev->saved_raid_disk = -1;
7984
7985 md_update_sb(mddev, 1);
7986 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7987 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7988 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7989 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7990 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7991
7992 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7993 sysfs_notify_dirent_safe(mddev->sysfs_action);
7994 md_new_event(mddev);
7995 if (mddev->event_work.func)
7996 queue_work(md_misc_wq, &mddev->event_work);
7997}
7998
7999void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
8000{
8001 sysfs_notify_dirent_safe(rdev->sysfs_state);
8002 wait_event_timeout(rdev->blocked_wait,
8003 !test_bit(Blocked, &rdev->flags) &&
8004 !test_bit(BlockedBadBlocks, &rdev->flags),
8005 msecs_to_jiffies(5000));
8006 rdev_dec_pending(rdev, mddev);
8007}
8008EXPORT_SYMBOL(md_wait_for_blocked_rdev);
8009
8010void md_finish_reshape(struct mddev *mddev)
8011{
8012
8013 struct md_rdev *rdev;
8014
8015 rdev_for_each(rdev, mddev) {
8016 if (rdev->data_offset > rdev->new_data_offset)
8017 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
8018 else
8019 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
8020 rdev->data_offset = rdev->new_data_offset;
8021 }
8022}
8023EXPORT_SYMBOL(md_finish_reshape);
8024
8025
8026
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8040
8041
8042
8043
8044
8045
8046
8047
8048
8049
8050
8051int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
8052 sector_t *first_bad, int *bad_sectors)
8053{
8054 int hi;
8055 int lo;
8056 u64 *p = bb->page;
8057 int rv;
8058 sector_t target = s + sectors;
8059 unsigned seq;
8060
8061 if (bb->shift > 0) {
8062
8063 s >>= bb->shift;
8064 target += (1<<bb->shift) - 1;
8065 target >>= bb->shift;
8066 sectors = target - s;
8067 }
8068
8069
8070retry:
8071 seq = read_seqbegin(&bb->lock);
8072 lo = 0;
8073 rv = 0;
8074 hi = bb->count;
8075
8076
8077
8078
8079
8080
8081
8082
8083
8084 while (hi - lo > 1) {
8085 int mid = (lo + hi) / 2;
8086 sector_t a = BB_OFFSET(p[mid]);
8087 if (a < target)
8088
8089
8090 lo = mid;
8091 else
8092
8093 hi = mid;
8094 }
8095
8096 if (hi > lo) {
8097
8098
8099
8100 while (lo >= 0 &&
8101 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
8102 if (BB_OFFSET(p[lo]) < target) {
8103
8104
8105
8106 if (rv != -1 && BB_ACK(p[lo]))
8107 rv = 1;
8108 else
8109 rv = -1;
8110 *first_bad = BB_OFFSET(p[lo]);
8111 *bad_sectors = BB_LEN(p[lo]);
8112 }
8113 lo--;
8114 }
8115 }
8116
8117 if (read_seqretry(&bb->lock, seq))
8118 goto retry;
8119
8120 return rv;
8121}
8122EXPORT_SYMBOL_GPL(md_is_badblock);
8123
8124
8125
8126
8127
8128
8129
8130
8131static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
8132 int acknowledged)
8133{
8134 u64 *p;
8135 int lo, hi;
8136 int rv = 1;
8137 unsigned long flags;
8138
8139 if (bb->shift < 0)
8140
8141 return 0;
8142
8143 if (bb->shift) {
8144
8145 sector_t next = s + sectors;
8146 s >>= bb->shift;
8147 next += (1<<bb->shift) - 1;
8148 next >>= bb->shift;
8149 sectors = next - s;
8150 }
8151
8152 write_seqlock_irqsave(&bb->lock, flags);
8153
8154 p = bb->page;
8155 lo = 0;
8156 hi = bb->count;
8157
8158 while (hi - lo > 1) {
8159 int mid = (lo + hi) / 2;
8160 sector_t a = BB_OFFSET(p[mid]);
8161 if (a <= s)
8162 lo = mid;
8163 else
8164 hi = mid;
8165 }
8166 if (hi > lo && BB_OFFSET(p[lo]) > s)
8167 hi = lo;
8168
8169 if (hi > lo) {
8170
8171
8172
8173 sector_t a = BB_OFFSET(p[lo]);
8174 sector_t e = a + BB_LEN(p[lo]);
8175 int ack = BB_ACK(p[lo]);
8176 if (e >= s) {
8177
8178 if (s == a && s + sectors >= e)
8179
8180 ack = acknowledged;
8181 else
8182 ack = ack && acknowledged;
8183
8184 if (e < s + sectors)
8185 e = s + sectors;
8186 if (e - a <= BB_MAX_LEN) {
8187 p[lo] = BB_MAKE(a, e-a, ack);
8188 s = e;
8189 } else {
8190
8191
8192
8193 if (BB_LEN(p[lo]) != BB_MAX_LEN)
8194 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
8195 s = a + BB_MAX_LEN;
8196 }
8197 sectors = e - s;
8198 }
8199 }
8200 if (sectors && hi < bb->count) {
8201
8202
8203 sector_t a = BB_OFFSET(p[hi]);
8204 sector_t e = a + BB_LEN(p[hi]);
8205 int ack = BB_ACK(p[hi]);
8206 if (a <= s + sectors) {
8207
8208 if (e <= s + sectors) {
8209
8210 e = s + sectors;
8211 ack = acknowledged;
8212 } else
8213 ack = ack && acknowledged;
8214
8215 a = s;
8216 if (e - a <= BB_MAX_LEN) {
8217 p[hi] = BB_MAKE(a, e-a, ack);
8218 s = e;
8219 } else {
8220 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
8221 s = a + BB_MAX_LEN;
8222 }
8223 sectors = e - s;
8224 lo = hi;
8225 hi++;
8226 }
8227 }
8228 if (sectors == 0 && hi < bb->count) {
8229
8230
8231 sector_t a = BB_OFFSET(p[hi]);
8232 int lolen = BB_LEN(p[lo]);
8233 int hilen = BB_LEN(p[hi]);
8234 int newlen = lolen + hilen - (s - a);
8235 if (s >= a && newlen < BB_MAX_LEN) {
8236
8237 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
8238 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
8239 memmove(p + hi, p + hi + 1,
8240 (bb->count - hi - 1) * 8);
8241 bb->count--;
8242 }
8243 }
8244 while (sectors) {
8245
8246
8247 if (bb->count >= MD_MAX_BADBLOCKS) {
8248
8249 rv = 0;
8250 break;
8251 } else {
8252 int this_sectors = sectors;
8253 memmove(p + hi + 1, p + hi,
8254 (bb->count - hi) * 8);
8255 bb->count++;
8256
8257 if (this_sectors > BB_MAX_LEN)
8258 this_sectors = BB_MAX_LEN;
8259 p[hi] = BB_MAKE(s, this_sectors, acknowledged);
8260 sectors -= this_sectors;
8261 s += this_sectors;
8262 }
8263 }
8264
8265 bb->changed = 1;
8266 if (!acknowledged)
8267 bb->unacked_exist = 1;
8268 write_sequnlock_irqrestore(&bb->lock, flags);
8269
8270 return rv;
8271}
8272
8273int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8274 int is_new)
8275{
8276 int rv;
8277 if (is_new)
8278 s += rdev->new_data_offset;
8279 else
8280 s += rdev->data_offset;
8281 rv = md_set_badblocks(&rdev->badblocks,
8282 s, sectors, 0);
8283 if (rv) {
8284
8285 sysfs_notify_dirent_safe(rdev->sysfs_state);
8286 set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
8287 md_wakeup_thread(rdev->mddev->thread);
8288 }
8289 return rv;
8290}
8291EXPORT_SYMBOL_GPL(rdev_set_badblocks);
8292
8293
8294
8295
8296
8297
8298
8299static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
8300{
8301 u64 *p;
8302 int lo, hi;
8303 sector_t target = s + sectors;
8304 int rv = 0;
8305
8306 if (bb->shift > 0) {
8307
8308
8309
8310
8311
8312
8313 s += (1<<bb->shift) - 1;
8314 s >>= bb->shift;
8315 target >>= bb->shift;
8316 sectors = target - s;
8317 }
8318
8319 write_seqlock_irq(&bb->lock);
8320
8321 p = bb->page;
8322 lo = 0;
8323 hi = bb->count;
8324
8325 while (hi - lo > 1) {
8326 int mid = (lo + hi) / 2;
8327 sector_t a = BB_OFFSET(p[mid]);
8328 if (a < target)
8329 lo = mid;
8330 else
8331 hi = mid;
8332 }
8333 if (hi > lo) {
8334
8335
8336
8337
8338 if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
8339
8340 int ack = BB_ACK(p[lo]);
8341 sector_t a = BB_OFFSET(p[lo]);
8342 sector_t end = a + BB_LEN(p[lo]);
8343
8344 if (a < s) {
8345
8346 if (bb->count >= MD_MAX_BADBLOCKS) {
8347 rv = 0;
8348 goto out;
8349 }
8350 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
8351 bb->count++;
8352 p[lo] = BB_MAKE(a, s-a, ack);
8353 lo++;
8354 }
8355 p[lo] = BB_MAKE(target, end - target, ack);
8356
8357 hi = lo;
8358 lo--;
8359 }
8360 while (lo >= 0 &&
8361 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
8362
8363 if (BB_OFFSET(p[lo]) < s) {
8364
8365 int ack = BB_ACK(p[lo]);
8366 sector_t start = BB_OFFSET(p[lo]);
8367 p[lo] = BB_MAKE(start, s - start, ack);
8368
8369 break;
8370 }
8371 lo--;
8372 }
8373
8374
8375
8376 if (hi - lo > 1) {
8377 memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
8378 bb->count -= (hi - lo - 1);
8379 }
8380 }
8381
8382 bb->changed = 1;
8383out:
8384 write_sequnlock_irq(&bb->lock);
8385 return rv;
8386}
8387
8388int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8389 int is_new)
8390{
8391 if (is_new)
8392 s += rdev->new_data_offset;
8393 else
8394 s += rdev->data_offset;
8395 return md_clear_badblocks(&rdev->badblocks,
8396 s, sectors);
8397}
8398EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8399
8400
8401
8402
8403
8404
8405void md_ack_all_badblocks(struct badblocks *bb)
8406{
8407 if (bb->page == NULL || bb->changed)
8408
8409 return;
8410 write_seqlock_irq(&bb->lock);
8411
8412 if (bb->changed == 0 && bb->unacked_exist) {
8413 u64 *p = bb->page;
8414 int i;
8415 for (i = 0; i < bb->count ; i++) {
8416 if (!BB_ACK(p[i])) {
8417 sector_t start = BB_OFFSET(p[i]);
8418 int len = BB_LEN(p[i]);
8419 p[i] = BB_MAKE(start, len, 1);
8420 }
8421 }
8422 bb->unacked_exist = 0;
8423 }
8424 write_sequnlock_irq(&bb->lock);
8425}
8426EXPORT_SYMBOL_GPL(md_ack_all_badblocks);
8427
8428
8429
8430
8431
8432
8433
8434
8435
8436
8437
8438
8439
8440static ssize_t
8441badblocks_show(struct badblocks *bb, char *page, int unack)
8442{
8443 size_t len;
8444 int i;
8445 u64 *p = bb->page;
8446 unsigned seq;
8447
8448 if (bb->shift < 0)
8449 return 0;
8450
8451retry:
8452 seq = read_seqbegin(&bb->lock);
8453
8454 len = 0;
8455 i = 0;
8456
8457 while (len < PAGE_SIZE && i < bb->count) {
8458 sector_t s = BB_OFFSET(p[i]);
8459 unsigned int length = BB_LEN(p[i]);
8460 int ack = BB_ACK(p[i]);
8461 i++;
8462
8463 if (unack && ack)
8464 continue;
8465
8466 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
8467 (unsigned long long)s << bb->shift,
8468 length << bb->shift);
8469 }
8470 if (unack && len == 0)
8471 bb->unacked_exist = 0;
8472
8473 if (read_seqretry(&bb->lock, seq))
8474 goto retry;
8475
8476 return len;
8477}
8478
8479#define DO_DEBUG 1
8480
8481static ssize_t
8482badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack)
8483{
8484 unsigned long long sector;
8485 int length;
8486 char newline;
8487#ifdef DO_DEBUG
8488
8489
8490
8491 int clear = 0;
8492 if (page[0] == '-') {
8493 clear = 1;
8494 page++;
8495 }
8496#endif
8497
8498 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
8499 case 3:
8500 if (newline != '\n')
8501 return -EINVAL;
8502 case 2:
8503 if (length <= 0)
8504 return -EINVAL;
8505 break;
8506 default:
8507 return -EINVAL;
8508 }
8509
8510#ifdef DO_DEBUG
8511 if (clear) {
8512 md_clear_badblocks(bb, sector, length);
8513 return len;
8514 }
8515#endif
8516 if (md_set_badblocks(bb, sector, length, !unack))
8517 return len;
8518 else
8519 return -ENOSPC;
8520}
8521
8522static int md_notify_reboot(struct notifier_block *this,
8523 unsigned long code, void *x)
8524{
8525 struct list_head *tmp;
8526 struct mddev *mddev;
8527 int need_delay = 0;
8528
8529 for_each_mddev(mddev, tmp) {
8530 if (mddev_trylock(mddev)) {
8531 if (mddev->pers)
8532 __md_stop_writes(mddev);
8533 mddev->safemode = 2;
8534 mddev_unlock(mddev);
8535 }
8536 need_delay = 1;
8537 }
8538
8539
8540
8541
8542
8543
8544 if (need_delay)
8545 mdelay(1000*1);
8546
8547 return NOTIFY_DONE;
8548}
8549
8550static struct notifier_block md_notifier = {
8551 .notifier_call = md_notify_reboot,
8552 .next = NULL,
8553 .priority = INT_MAX,
8554};
8555
8556static void md_geninit(void)
8557{
8558 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
8559
8560 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
8561}
8562
8563static int __init md_init(void)
8564{
8565 int ret = -ENOMEM;
8566
8567 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
8568 if (!md_wq)
8569 goto err_wq;
8570
8571 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
8572 if (!md_misc_wq)
8573 goto err_misc_wq;
8574
8575 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
8576 goto err_md;
8577
8578 if ((ret = register_blkdev(0, "mdp")) < 0)
8579 goto err_mdp;
8580 mdp_major = ret;
8581
8582 blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
8583 md_probe, NULL, NULL);
8584 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
8585 md_probe, NULL, NULL);
8586
8587 register_reboot_notifier(&md_notifier);
8588 raid_table_header = register_sysctl_table(raid_root_table);
8589
8590 md_geninit();
8591 return 0;
8592
8593err_mdp:
8594 unregister_blkdev(MD_MAJOR, "md");
8595err_md:
8596 destroy_workqueue(md_misc_wq);
8597err_misc_wq:
8598 destroy_workqueue(md_wq);
8599err_wq:
8600 return ret;
8601}
8602
8603#ifndef MODULE
8604
8605
8606
8607
8608
8609
8610static LIST_HEAD(all_detected_devices);
8611struct detected_devices_node {
8612 struct list_head list;
8613 dev_t dev;
8614};
8615
8616void md_autodetect_dev(dev_t dev)
8617{
8618 struct detected_devices_node *node_detected_dev;
8619
8620 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
8621 if (node_detected_dev) {
8622 node_detected_dev->dev = dev;
8623 list_add_tail(&node_detected_dev->list, &all_detected_devices);
8624 } else {
8625 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
8626 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
8627 }
8628}
8629
8630
8631static void autostart_arrays(int part)
8632{
8633 struct md_rdev *rdev;
8634 struct detected_devices_node *node_detected_dev;
8635 dev_t dev;
8636 int i_scanned, i_passed;
8637
8638 i_scanned = 0;
8639 i_passed = 0;
8640
8641 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
8642
8643 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
8644 i_scanned++;
8645 node_detected_dev = list_entry(all_detected_devices.next,
8646 struct detected_devices_node, list);
8647 list_del(&node_detected_dev->list);
8648 dev = node_detected_dev->dev;
8649 kfree(node_detected_dev);
8650 rdev = md_import_device(dev,0, 90);
8651 if (IS_ERR(rdev))
8652 continue;
8653
8654 if (test_bit(Faulty, &rdev->flags)) {
8655 MD_BUG();
8656 continue;
8657 }
8658 set_bit(AutoDetected, &rdev->flags);
8659 list_add(&rdev->same_set, &pending_raid_disks);
8660 i_passed++;
8661 }
8662
8663 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
8664 i_scanned, i_passed);
8665
8666 autorun_devices(part);
8667}
8668
8669#endif
8670
8671static __exit void md_exit(void)
8672{
8673 struct mddev *mddev;
8674 struct list_head *tmp;
8675
8676 blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
8677 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
8678
8679 unregister_blkdev(MD_MAJOR,"md");
8680 unregister_blkdev(mdp_major, "mdp");
8681 unregister_reboot_notifier(&md_notifier);
8682 unregister_sysctl_table(raid_table_header);
8683 remove_proc_entry("mdstat", NULL);
8684 for_each_mddev(mddev, tmp) {
8685 export_array(mddev);
8686 mddev->hold_active = 0;
8687 }
8688 destroy_workqueue(md_misc_wq);
8689 destroy_workqueue(md_wq);
8690}
8691
8692subsys_initcall(md_init);
8693module_exit(md_exit)
8694
8695static int get_ro(char *buffer, struct kernel_param *kp)
8696{
8697 return sprintf(buffer, "%d", start_readonly);
8698}
8699static int set_ro(const char *val, struct kernel_param *kp)
8700{
8701 char *e;
8702 int num = simple_strtoul(val, &e, 10);
8703 if (*val && (*e == '\0' || *e == '\n')) {
8704 start_readonly = num;
8705 return 0;
8706 }
8707 return -EINVAL;
8708}
8709
8710module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
8711module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
8712
8713module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
8714
8715EXPORT_SYMBOL(register_md_personality);
8716EXPORT_SYMBOL(unregister_md_personality);
8717EXPORT_SYMBOL(md_error);
8718EXPORT_SYMBOL(md_done_sync);
8719EXPORT_SYMBOL(md_write_start);
8720EXPORT_SYMBOL(md_write_end);
8721EXPORT_SYMBOL(md_register_thread);
8722EXPORT_SYMBOL(md_unregister_thread);
8723EXPORT_SYMBOL(md_wakeup_thread);
8724EXPORT_SYMBOL(md_check_recovery);
8725EXPORT_SYMBOL(md_reap_sync_thread);
8726MODULE_LICENSE("GPL");
8727MODULE_DESCRIPTION("MD RAID framework");
8728MODULE_ALIAS("md");
8729MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
8730