1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/blkdev.h>
37#include <linux/sysctl.h>
38#include <linux/seq_file.h>
39#include <linux/fs.h>
40#include <linux/poll.h>
41#include <linux/ctype.h>
42#include <linux/string.h>
43#include <linux/hdreg.h>
44#include <linux/proc_fs.h>
45#include <linux/random.h>
46#include <linux/module.h>
47#include <linux/reboot.h>
48#include <linux/file.h>
49#include <linux/compat.h>
50#include <linux/delay.h>
51#include <linux/raid/md_p.h>
52#include <linux/raid/md_u.h>
53#include <linux/slab.h>
54#include "md.h"
55#include "bitmap.h"
56
57#ifndef MODULE
58static void autostart_arrays(int part);
59#endif
60
61
62
63
64
65
66static LIST_HEAD(pers_list);
67static DEFINE_SPINLOCK(pers_lock);
68
69static void md_print_devices(void);
70
71static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
72static struct workqueue_struct *md_wq;
73static struct workqueue_struct *md_misc_wq;
74
75#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
76
77
78
79
80
81
82#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
83
84
85
86
87
88
89
90
91
92
93
94
95
96static int sysctl_speed_limit_min = 1000;
97static int sysctl_speed_limit_max = 200000;
98static inline int speed_min(struct mddev *mddev)
99{
100 return mddev->sync_speed_min ?
101 mddev->sync_speed_min : sysctl_speed_limit_min;
102}
103
104static inline int speed_max(struct mddev *mddev)
105{
106 return mddev->sync_speed_max ?
107 mddev->sync_speed_max : sysctl_speed_limit_max;
108}
109
110static struct ctl_table_header *raid_table_header;
111
112static ctl_table raid_table[] = {
113 {
114 .procname = "speed_limit_min",
115 .data = &sysctl_speed_limit_min,
116 .maxlen = sizeof(int),
117 .mode = S_IRUGO|S_IWUSR,
118 .proc_handler = proc_dointvec,
119 },
120 {
121 .procname = "speed_limit_max",
122 .data = &sysctl_speed_limit_max,
123 .maxlen = sizeof(int),
124 .mode = S_IRUGO|S_IWUSR,
125 .proc_handler = proc_dointvec,
126 },
127 { }
128};
129
130static ctl_table raid_dir_table[] = {
131 {
132 .procname = "raid",
133 .maxlen = 0,
134 .mode = S_IRUGO|S_IXUGO,
135 .child = raid_table,
136 },
137 { }
138};
139
140static ctl_table raid_root_table[] = {
141 {
142 .procname = "dev",
143 .maxlen = 0,
144 .mode = 0555,
145 .child = raid_dir_table,
146 },
147 { }
148};
149
150static const struct block_device_operations md_fops;
151
152static int start_readonly;
153
154
155
156
157
158static void mddev_bio_destructor(struct bio *bio)
159{
160 struct mddev *mddev, **mddevp;
161
162 mddevp = (void*)bio;
163 mddev = mddevp[-1];
164
165 bio_free(bio, mddev->bio_set);
166}
167
168struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
169 struct mddev *mddev)
170{
171 struct bio *b;
172 struct mddev **mddevp;
173
174 if (!mddev || !mddev->bio_set)
175 return bio_alloc(gfp_mask, nr_iovecs);
176
177 b = bio_alloc_bioset(gfp_mask, nr_iovecs,
178 mddev->bio_set);
179 if (!b)
180 return NULL;
181 mddevp = (void*)b;
182 mddevp[-1] = mddev;
183 b->bi_destructor = mddev_bio_destructor;
184 return b;
185}
186EXPORT_SYMBOL_GPL(bio_alloc_mddev);
187
188struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
189 struct mddev *mddev)
190{
191 struct bio *b;
192 struct mddev **mddevp;
193
194 if (!mddev || !mddev->bio_set)
195 return bio_clone(bio, gfp_mask);
196
197 b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs,
198 mddev->bio_set);
199 if (!b)
200 return NULL;
201 mddevp = (void*)b;
202 mddevp[-1] = mddev;
203 b->bi_destructor = mddev_bio_destructor;
204 __bio_clone(b, bio);
205 if (bio_integrity(bio)) {
206 int ret;
207
208 ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set);
209
210 if (ret < 0) {
211 bio_put(b);
212 return NULL;
213 }
214 }
215
216 return b;
217}
218EXPORT_SYMBOL_GPL(bio_clone_mddev);
219
220void md_trim_bio(struct bio *bio, int offset, int size)
221{
222
223
224
225
226 int i;
227 struct bio_vec *bvec;
228 int sofar = 0;
229
230 size <<= 9;
231 if (offset == 0 && size == bio->bi_size)
232 return;
233
234 bio->bi_sector += offset;
235 bio->bi_size = size;
236 offset <<= 9;
237 clear_bit(BIO_SEG_VALID, &bio->bi_flags);
238
239 while (bio->bi_idx < bio->bi_vcnt &&
240 bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
241
242 offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
243 bio->bi_idx++;
244 }
245 if (bio->bi_idx < bio->bi_vcnt) {
246 bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
247 bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
248 }
249
250 if (bio->bi_idx) {
251 memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
252 (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
253 bio->bi_vcnt -= bio->bi_idx;
254 bio->bi_idx = 0;
255 }
256
257 bio_for_each_segment(bvec, bio, i) {
258 if (sofar + bvec->bv_len > size)
259 bvec->bv_len = size - sofar;
260 if (bvec->bv_len == 0) {
261 bio->bi_vcnt = i;
262 break;
263 }
264 sofar += bvec->bv_len;
265 }
266}
267EXPORT_SYMBOL_GPL(md_trim_bio);
268
269
270
271
272
273
274
275
276
277
278
279static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
280static atomic_t md_event_count;
281void md_new_event(struct mddev *mddev)
282{
283 atomic_inc(&md_event_count);
284 wake_up(&md_event_waiters);
285}
286EXPORT_SYMBOL_GPL(md_new_event);
287
288
289
290
291static void md_new_event_inintr(struct mddev *mddev)
292{
293 atomic_inc(&md_event_count);
294 wake_up(&md_event_waiters);
295}
296
297
298
299
300
301static LIST_HEAD(all_mddevs);
302static DEFINE_SPINLOCK(all_mddevs_lock);
303
304
305
306
307
308
309
310
311
312#define for_each_mddev(_mddev,_tmp) \
313 \
314 for (({ spin_lock(&all_mddevs_lock); \
315 _tmp = all_mddevs.next; \
316 _mddev = NULL;}); \
317 ({ if (_tmp != &all_mddevs) \
318 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
319 spin_unlock(&all_mddevs_lock); \
320 if (_mddev) mddev_put(_mddev); \
321 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
322 _tmp != &all_mddevs;}); \
323 ({ spin_lock(&all_mddevs_lock); \
324 _tmp = _tmp->next;}) \
325 )
326
327
328
329
330
331
332
333
334
335static void md_make_request(struct request_queue *q, struct bio *bio)
336{
337 const int rw = bio_data_dir(bio);
338 struct mddev *mddev = q->queuedata;
339 int cpu;
340 unsigned int sectors;
341
342 if (mddev == NULL || mddev->pers == NULL
343 || !mddev->ready) {
344 bio_io_error(bio);
345 return;
346 }
347 smp_rmb();
348 rcu_read_lock();
349 if (mddev->suspended) {
350 DEFINE_WAIT(__wait);
351 for (;;) {
352 prepare_to_wait(&mddev->sb_wait, &__wait,
353 TASK_UNINTERRUPTIBLE);
354 if (!mddev->suspended)
355 break;
356 rcu_read_unlock();
357 schedule();
358 rcu_read_lock();
359 }
360 finish_wait(&mddev->sb_wait, &__wait);
361 }
362 atomic_inc(&mddev->active_io);
363 rcu_read_unlock();
364
365
366
367
368
369 sectors = bio_sectors(bio);
370 mddev->pers->make_request(mddev, bio);
371
372 cpu = part_stat_lock();
373 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
374 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
375 part_stat_unlock();
376
377 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
378 wake_up(&mddev->sb_wait);
379}
380
381
382
383
384
385
386
387void mddev_suspend(struct mddev *mddev)
388{
389 BUG_ON(mddev->suspended);
390 mddev->suspended = 1;
391 synchronize_rcu();
392 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
393 mddev->pers->quiesce(mddev, 1);
394}
395EXPORT_SYMBOL_GPL(mddev_suspend);
396
397void mddev_resume(struct mddev *mddev)
398{
399 mddev->suspended = 0;
400 wake_up(&mddev->sb_wait);
401 mddev->pers->quiesce(mddev, 0);
402
403 md_wakeup_thread(mddev->thread);
404 md_wakeup_thread(mddev->sync_thread);
405}
406EXPORT_SYMBOL_GPL(mddev_resume);
407
408int mddev_congested(struct mddev *mddev, int bits)
409{
410 return mddev->suspended;
411}
412EXPORT_SYMBOL(mddev_congested);
413
414
415
416
417
418static void md_end_flush(struct bio *bio, int err)
419{
420 struct md_rdev *rdev = bio->bi_private;
421 struct mddev *mddev = rdev->mddev;
422
423 rdev_dec_pending(rdev, mddev);
424
425 if (atomic_dec_and_test(&mddev->flush_pending)) {
426
427 queue_work(md_wq, &mddev->flush_work);
428 }
429 bio_put(bio);
430}
431
432static void md_submit_flush_data(struct work_struct *ws);
433
434static void submit_flushes(struct work_struct *ws)
435{
436 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
437 struct md_rdev *rdev;
438
439 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
440 atomic_set(&mddev->flush_pending, 1);
441 rcu_read_lock();
442 list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
443 if (rdev->raid_disk >= 0 &&
444 !test_bit(Faulty, &rdev->flags)) {
445
446
447
448
449 struct bio *bi;
450 atomic_inc(&rdev->nr_pending);
451 atomic_inc(&rdev->nr_pending);
452 rcu_read_unlock();
453 bi = bio_alloc_mddev(GFP_KERNEL, 0, mddev);
454 bi->bi_end_io = md_end_flush;
455 bi->bi_private = rdev;
456 bi->bi_bdev = rdev->bdev;
457 atomic_inc(&mddev->flush_pending);
458 submit_bio(WRITE_FLUSH, bi);
459 rcu_read_lock();
460 rdev_dec_pending(rdev, mddev);
461 }
462 rcu_read_unlock();
463 if (atomic_dec_and_test(&mddev->flush_pending))
464 queue_work(md_wq, &mddev->flush_work);
465}
466
467static void md_submit_flush_data(struct work_struct *ws)
468{
469 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
470 struct bio *bio = mddev->flush_bio;
471
472 if (bio->bi_size == 0)
473
474 bio_endio(bio, 0);
475 else {
476 bio->bi_rw &= ~REQ_FLUSH;
477 mddev->pers->make_request(mddev, bio);
478 }
479
480 mddev->flush_bio = NULL;
481 wake_up(&mddev->sb_wait);
482}
483
484void md_flush_request(struct mddev *mddev, struct bio *bio)
485{
486 spin_lock_irq(&mddev->write_lock);
487 wait_event_lock_irq(mddev->sb_wait,
488 !mddev->flush_bio,
489 mddev->write_lock, );
490 mddev->flush_bio = bio;
491 spin_unlock_irq(&mddev->write_lock);
492
493 INIT_WORK(&mddev->flush_work, submit_flushes);
494 queue_work(md_wq, &mddev->flush_work);
495}
496EXPORT_SYMBOL(md_flush_request);
497
498
499
500
501
502
503
504
505
506struct md_plug_cb {
507 struct blk_plug_cb cb;
508 struct mddev *mddev;
509};
510
511static void plugger_unplug(struct blk_plug_cb *cb)
512{
513 struct md_plug_cb *mdcb = container_of(cb, struct md_plug_cb, cb);
514 if (atomic_dec_and_test(&mdcb->mddev->plug_cnt))
515 md_wakeup_thread(mdcb->mddev->thread);
516 kfree(mdcb);
517}
518
519
520
521
522int mddev_check_plugged(struct mddev *mddev)
523{
524 struct blk_plug *plug = current->plug;
525 struct md_plug_cb *mdcb;
526
527 if (!plug)
528 return 0;
529
530 list_for_each_entry(mdcb, &plug->cb_list, cb.list) {
531 if (mdcb->cb.callback == plugger_unplug &&
532 mdcb->mddev == mddev) {
533
534 if (mdcb != list_first_entry(&plug->cb_list,
535 struct md_plug_cb,
536 cb.list))
537 list_move(&mdcb->cb.list, &plug->cb_list);
538 return 1;
539 }
540 }
541
542 mdcb = kmalloc(sizeof(*mdcb), GFP_ATOMIC);
543 if (!mdcb)
544 return 0;
545
546 mdcb->mddev = mddev;
547 mdcb->cb.callback = plugger_unplug;
548 atomic_inc(&mddev->plug_cnt);
549 list_add(&mdcb->cb.list, &plug->cb_list);
550 return 1;
551}
552EXPORT_SYMBOL_GPL(mddev_check_plugged);
553
554static inline struct mddev *mddev_get(struct mddev *mddev)
555{
556 atomic_inc(&mddev->active);
557 return mddev;
558}
559
560static void mddev_delayed_delete(struct work_struct *ws);
561
562static void mddev_put(struct mddev *mddev)
563{
564 struct bio_set *bs = NULL;
565
566 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
567 return;
568 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
569 mddev->ctime == 0 && !mddev->hold_active) {
570
571
572 list_del_init(&mddev->all_mddevs);
573 bs = mddev->bio_set;
574 mddev->bio_set = NULL;
575 if (mddev->gendisk) {
576
577
578
579
580
581 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
582 queue_work(md_misc_wq, &mddev->del_work);
583 } else
584 kfree(mddev);
585 }
586 spin_unlock(&all_mddevs_lock);
587 if (bs)
588 bioset_free(bs);
589}
590
591void mddev_init(struct mddev *mddev)
592{
593 mutex_init(&mddev->open_mutex);
594 mutex_init(&mddev->reconfig_mutex);
595 mutex_init(&mddev->bitmap_info.mutex);
596 INIT_LIST_HEAD(&mddev->disks);
597 INIT_LIST_HEAD(&mddev->all_mddevs);
598 init_timer(&mddev->safemode_timer);
599 atomic_set(&mddev->active, 1);
600 atomic_set(&mddev->openers, 0);
601 atomic_set(&mddev->active_io, 0);
602 atomic_set(&mddev->plug_cnt, 0);
603 spin_lock_init(&mddev->write_lock);
604 atomic_set(&mddev->flush_pending, 0);
605 init_waitqueue_head(&mddev->sb_wait);
606 init_waitqueue_head(&mddev->recovery_wait);
607 mddev->reshape_position = MaxSector;
608 mddev->resync_min = 0;
609 mddev->resync_max = MaxSector;
610 mddev->level = LEVEL_NONE;
611}
612EXPORT_SYMBOL_GPL(mddev_init);
613
614static struct mddev * mddev_find(dev_t unit)
615{
616 struct mddev *mddev, *new = NULL;
617
618 if (unit && MAJOR(unit) != MD_MAJOR)
619 unit &= ~((1<<MdpMinorShift)-1);
620
621 retry:
622 spin_lock(&all_mddevs_lock);
623
624 if (unit) {
625 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
626 if (mddev->unit == unit) {
627 mddev_get(mddev);
628 spin_unlock(&all_mddevs_lock);
629 kfree(new);
630 return mddev;
631 }
632
633 if (new) {
634 list_add(&new->all_mddevs, &all_mddevs);
635 spin_unlock(&all_mddevs_lock);
636 new->hold_active = UNTIL_IOCTL;
637 return new;
638 }
639 } else if (new) {
640
641 static int next_minor = 512;
642 int start = next_minor;
643 int is_free = 0;
644 int dev = 0;
645 while (!is_free) {
646 dev = MKDEV(MD_MAJOR, next_minor);
647 next_minor++;
648 if (next_minor > MINORMASK)
649 next_minor = 0;
650 if (next_minor == start) {
651
652 spin_unlock(&all_mddevs_lock);
653 kfree(new);
654 return NULL;
655 }
656
657 is_free = 1;
658 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
659 if (mddev->unit == dev) {
660 is_free = 0;
661 break;
662 }
663 }
664 new->unit = dev;
665 new->md_minor = MINOR(dev);
666 new->hold_active = UNTIL_STOP;
667 list_add(&new->all_mddevs, &all_mddevs);
668 spin_unlock(&all_mddevs_lock);
669 return new;
670 }
671 spin_unlock(&all_mddevs_lock);
672
673 new = kzalloc(sizeof(*new), GFP_KERNEL);
674 if (!new)
675 return NULL;
676
677 new->unit = unit;
678 if (MAJOR(unit) == MD_MAJOR)
679 new->md_minor = MINOR(unit);
680 else
681 new->md_minor = MINOR(unit) >> MdpMinorShift;
682
683 mddev_init(new);
684
685 goto retry;
686}
687
688static inline int mddev_lock(struct mddev * mddev)
689{
690 return mutex_lock_interruptible(&mddev->reconfig_mutex);
691}
692
693static inline int mddev_is_locked(struct mddev *mddev)
694{
695 return mutex_is_locked(&mddev->reconfig_mutex);
696}
697
698static inline int mddev_trylock(struct mddev * mddev)
699{
700 return mutex_trylock(&mddev->reconfig_mutex);
701}
702
703static struct attribute_group md_redundancy_group;
704
705static void mddev_unlock(struct mddev * mddev)
706{
707 if (mddev->to_remove) {
708
709
710
711
712
713
714
715
716
717
718
719
720 struct attribute_group *to_remove = mddev->to_remove;
721 mddev->to_remove = NULL;
722 mddev->sysfs_active = 1;
723 mutex_unlock(&mddev->reconfig_mutex);
724
725 if (mddev->kobj.sd) {
726 if (to_remove != &md_redundancy_group)
727 sysfs_remove_group(&mddev->kobj, to_remove);
728 if (mddev->pers == NULL ||
729 mddev->pers->sync_request == NULL) {
730 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
731 if (mddev->sysfs_action)
732 sysfs_put(mddev->sysfs_action);
733 mddev->sysfs_action = NULL;
734 }
735 }
736 mddev->sysfs_active = 0;
737 } else
738 mutex_unlock(&mddev->reconfig_mutex);
739
740
741
742
743 spin_lock(&pers_lock);
744 md_wakeup_thread(mddev->thread);
745 spin_unlock(&pers_lock);
746}
747
748static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr)
749{
750 struct md_rdev *rdev;
751
752 list_for_each_entry(rdev, &mddev->disks, same_set)
753 if (rdev->desc_nr == nr)
754 return rdev;
755
756 return NULL;
757}
758
759static struct md_rdev * find_rdev(struct mddev * mddev, dev_t dev)
760{
761 struct md_rdev *rdev;
762
763 list_for_each_entry(rdev, &mddev->disks, same_set)
764 if (rdev->bdev->bd_dev == dev)
765 return rdev;
766
767 return NULL;
768}
769
770static struct md_personality *find_pers(int level, char *clevel)
771{
772 struct md_personality *pers;
773 list_for_each_entry(pers, &pers_list, list) {
774 if (level != LEVEL_NONE && pers->level == level)
775 return pers;
776 if (strcmp(pers->name, clevel)==0)
777 return pers;
778 }
779 return NULL;
780}
781
782
783static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
784{
785 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
786 return MD_NEW_SIZE_SECTORS(num_sectors);
787}
788
789static int alloc_disk_sb(struct md_rdev * rdev)
790{
791 if (rdev->sb_page)
792 MD_BUG();
793
794 rdev->sb_page = alloc_page(GFP_KERNEL);
795 if (!rdev->sb_page) {
796 printk(KERN_ALERT "md: out of memory.\n");
797 return -ENOMEM;
798 }
799
800 return 0;
801}
802
803static void free_disk_sb(struct md_rdev * rdev)
804{
805 if (rdev->sb_page) {
806 put_page(rdev->sb_page);
807 rdev->sb_loaded = 0;
808 rdev->sb_page = NULL;
809 rdev->sb_start = 0;
810 rdev->sectors = 0;
811 }
812 if (rdev->bb_page) {
813 put_page(rdev->bb_page);
814 rdev->bb_page = NULL;
815 }
816}
817
818
819static void super_written(struct bio *bio, int error)
820{
821 struct md_rdev *rdev = bio->bi_private;
822 struct mddev *mddev = rdev->mddev;
823
824 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
825 printk("md: super_written gets error=%d, uptodate=%d\n",
826 error, test_bit(BIO_UPTODATE, &bio->bi_flags));
827 WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
828 md_error(mddev, rdev);
829 }
830
831 if (atomic_dec_and_test(&mddev->pending_writes))
832 wake_up(&mddev->sb_wait);
833 bio_put(bio);
834}
835
836void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
837 sector_t sector, int size, struct page *page)
838{
839
840
841
842
843
844
845 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
846
847 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
848 bio->bi_sector = sector;
849 bio_add_page(bio, page, size, 0);
850 bio->bi_private = rdev;
851 bio->bi_end_io = super_written;
852
853 atomic_inc(&mddev->pending_writes);
854 submit_bio(WRITE_FLUSH_FUA, bio);
855}
856
857void md_super_wait(struct mddev *mddev)
858{
859
860 DEFINE_WAIT(wq);
861 for(;;) {
862 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
863 if (atomic_read(&mddev->pending_writes)==0)
864 break;
865 schedule();
866 }
867 finish_wait(&mddev->sb_wait, &wq);
868}
869
870static void bi_complete(struct bio *bio, int error)
871{
872 complete((struct completion*)bio->bi_private);
873}
874
875int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
876 struct page *page, int rw, bool metadata_op)
877{
878 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
879 struct completion event;
880 int ret;
881
882 rw |= REQ_SYNC;
883
884 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
885 rdev->meta_bdev : rdev->bdev;
886 if (metadata_op)
887 bio->bi_sector = sector + rdev->sb_start;
888 else
889 bio->bi_sector = sector + rdev->data_offset;
890 bio_add_page(bio, page, size, 0);
891 init_completion(&event);
892 bio->bi_private = &event;
893 bio->bi_end_io = bi_complete;
894 submit_bio(rw, bio);
895 wait_for_completion(&event);
896
897 ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
898 bio_put(bio);
899 return ret;
900}
901EXPORT_SYMBOL_GPL(sync_page_io);
902
903static int read_disk_sb(struct md_rdev * rdev, int size)
904{
905 char b[BDEVNAME_SIZE];
906 if (!rdev->sb_page) {
907 MD_BUG();
908 return -EINVAL;
909 }
910 if (rdev->sb_loaded)
911 return 0;
912
913
914 if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
915 goto fail;
916 rdev->sb_loaded = 1;
917 return 0;
918
919fail:
920 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
921 bdevname(rdev->bdev,b));
922 return -EINVAL;
923}
924
925static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
926{
927 return sb1->set_uuid0 == sb2->set_uuid0 &&
928 sb1->set_uuid1 == sb2->set_uuid1 &&
929 sb1->set_uuid2 == sb2->set_uuid2 &&
930 sb1->set_uuid3 == sb2->set_uuid3;
931}
932
933static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
934{
935 int ret;
936 mdp_super_t *tmp1, *tmp2;
937
938 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
939 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
940
941 if (!tmp1 || !tmp2) {
942 ret = 0;
943 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
944 goto abort;
945 }
946
947 *tmp1 = *sb1;
948 *tmp2 = *sb2;
949
950
951
952
953 tmp1->nr_disks = 0;
954 tmp2->nr_disks = 0;
955
956 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
957abort:
958 kfree(tmp1);
959 kfree(tmp2);
960 return ret;
961}
962
963
964static u32 md_csum_fold(u32 csum)
965{
966 csum = (csum & 0xffff) + (csum >> 16);
967 return (csum & 0xffff) + (csum >> 16);
968}
969
970static unsigned int calc_sb_csum(mdp_super_t * sb)
971{
972 u64 newcsum = 0;
973 u32 *sb32 = (u32*)sb;
974 int i;
975 unsigned int disk_csum, csum;
976
977 disk_csum = sb->sb_csum;
978 sb->sb_csum = 0;
979
980 for (i = 0; i < MD_SB_BYTES/4 ; i++)
981 newcsum += sb32[i];
982 csum = (newcsum & 0xffffffff) + (newcsum>>32);
983
984
985#ifdef CONFIG_ALPHA
986
987
988
989
990
991
992
993
994 sb->sb_csum = md_csum_fold(disk_csum);
995#else
996 sb->sb_csum = disk_csum;
997#endif
998 return csum;
999}
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032struct super_type {
1033 char *name;
1034 struct module *owner;
1035 int (*load_super)(struct md_rdev *rdev, struct md_rdev *refdev,
1036 int minor_version);
1037 int (*validate_super)(struct mddev *mddev, struct md_rdev *rdev);
1038 void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
1039 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
1040 sector_t num_sectors);
1041};
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051int md_check_no_bitmap(struct mddev *mddev)
1052{
1053 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1054 return 0;
1055 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
1056 mdname(mddev), mddev->pers->name);
1057 return 1;
1058}
1059EXPORT_SYMBOL(md_check_no_bitmap);
1060
1061
1062
1063
1064static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1065{
1066 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1067 mdp_super_t *sb;
1068 int ret;
1069
1070
1071
1072
1073
1074
1075
1076 rdev->sb_start = calc_dev_sboffset(rdev);
1077
1078 ret = read_disk_sb(rdev, MD_SB_BYTES);
1079 if (ret) return ret;
1080
1081 ret = -EINVAL;
1082
1083 bdevname(rdev->bdev, b);
1084 sb = page_address(rdev->sb_page);
1085
1086 if (sb->md_magic != MD_SB_MAGIC) {
1087 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
1088 b);
1089 goto abort;
1090 }
1091
1092 if (sb->major_version != 0 ||
1093 sb->minor_version < 90 ||
1094 sb->minor_version > 91) {
1095 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
1096 sb->major_version, sb->minor_version,
1097 b);
1098 goto abort;
1099 }
1100
1101 if (sb->raid_disks <= 0)
1102 goto abort;
1103
1104 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1105 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
1106 b);
1107 goto abort;
1108 }
1109
1110 rdev->preferred_minor = sb->md_minor;
1111 rdev->data_offset = 0;
1112 rdev->sb_size = MD_SB_BYTES;
1113 rdev->badblocks.shift = -1;
1114
1115 if (sb->level == LEVEL_MULTIPATH)
1116 rdev->desc_nr = -1;
1117 else
1118 rdev->desc_nr = sb->this_disk.number;
1119
1120 if (!refdev) {
1121 ret = 1;
1122 } else {
1123 __u64 ev1, ev2;
1124 mdp_super_t *refsb = page_address(refdev->sb_page);
1125 if (!uuid_equal(refsb, sb)) {
1126 printk(KERN_WARNING "md: %s has different UUID to %s\n",
1127 b, bdevname(refdev->bdev,b2));
1128 goto abort;
1129 }
1130 if (!sb_equal(refsb, sb)) {
1131 printk(KERN_WARNING "md: %s has same UUID"
1132 " but different superblock to %s\n",
1133 b, bdevname(refdev->bdev, b2));
1134 goto abort;
1135 }
1136 ev1 = md_event(sb);
1137 ev2 = md_event(refsb);
1138 if (ev1 > ev2)
1139 ret = 1;
1140 else
1141 ret = 0;
1142 }
1143 rdev->sectors = rdev->sb_start;
1144
1145 if (rdev->sectors >= (2ULL << 32))
1146 rdev->sectors = (2ULL << 32) - 2;
1147
1148 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1149
1150 ret = -EINVAL;
1151
1152 abort:
1153 return ret;
1154}
1155
1156
1157
1158
1159static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1160{
1161 mdp_disk_t *desc;
1162 mdp_super_t *sb = page_address(rdev->sb_page);
1163 __u64 ev1 = md_event(sb);
1164
1165 rdev->raid_disk = -1;
1166 clear_bit(Faulty, &rdev->flags);
1167 clear_bit(In_sync, &rdev->flags);
1168 clear_bit(WriteMostly, &rdev->flags);
1169
1170 if (mddev->raid_disks == 0) {
1171 mddev->major_version = 0;
1172 mddev->minor_version = sb->minor_version;
1173 mddev->patch_version = sb->patch_version;
1174 mddev->external = 0;
1175 mddev->chunk_sectors = sb->chunk_size >> 9;
1176 mddev->ctime = sb->ctime;
1177 mddev->utime = sb->utime;
1178 mddev->level = sb->level;
1179 mddev->clevel[0] = 0;
1180 mddev->layout = sb->layout;
1181 mddev->raid_disks = sb->raid_disks;
1182 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1183 mddev->events = ev1;
1184 mddev->bitmap_info.offset = 0;
1185 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1186
1187 if (mddev->minor_version >= 91) {
1188 mddev->reshape_position = sb->reshape_position;
1189 mddev->delta_disks = sb->delta_disks;
1190 mddev->new_level = sb->new_level;
1191 mddev->new_layout = sb->new_layout;
1192 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1193 } else {
1194 mddev->reshape_position = MaxSector;
1195 mddev->delta_disks = 0;
1196 mddev->new_level = mddev->level;
1197 mddev->new_layout = mddev->layout;
1198 mddev->new_chunk_sectors = mddev->chunk_sectors;
1199 }
1200
1201 if (sb->state & (1<<MD_SB_CLEAN))
1202 mddev->recovery_cp = MaxSector;
1203 else {
1204 if (sb->events_hi == sb->cp_events_hi &&
1205 sb->events_lo == sb->cp_events_lo) {
1206 mddev->recovery_cp = sb->recovery_cp;
1207 } else
1208 mddev->recovery_cp = 0;
1209 }
1210
1211 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1212 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1213 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1214 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1215
1216 mddev->max_disks = MD_SB_DISKS;
1217
1218 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1219 mddev->bitmap_info.file == NULL)
1220 mddev->bitmap_info.offset =
1221 mddev->bitmap_info.default_offset;
1222
1223 } else if (mddev->pers == NULL) {
1224
1225
1226 ++ev1;
1227 if (sb->disks[rdev->desc_nr].state & (
1228 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1229 if (ev1 < mddev->events)
1230 return -EINVAL;
1231 } else if (mddev->bitmap) {
1232
1233
1234
1235 if (ev1 < mddev->bitmap->events_cleared)
1236 return 0;
1237 } else {
1238 if (ev1 < mddev->events)
1239
1240 return 0;
1241 }
1242
1243 if (mddev->level != LEVEL_MULTIPATH) {
1244 desc = sb->disks + rdev->desc_nr;
1245
1246 if (desc->state & (1<<MD_DISK_FAULTY))
1247 set_bit(Faulty, &rdev->flags);
1248 else if (desc->state & (1<<MD_DISK_SYNC)
1249) {
1250 set_bit(In_sync, &rdev->flags);
1251 rdev->raid_disk = desc->raid_disk;
1252 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1253
1254
1255
1256 if (mddev->minor_version >= 91) {
1257 rdev->recovery_offset = 0;
1258 rdev->raid_disk = desc->raid_disk;
1259 }
1260 }
1261 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1262 set_bit(WriteMostly, &rdev->flags);
1263 } else
1264 set_bit(In_sync, &rdev->flags);
1265 return 0;
1266}
1267
1268
1269
1270
1271static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1272{
1273 mdp_super_t *sb;
1274 struct md_rdev *rdev2;
1275 int next_spare = mddev->raid_disks;
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288 int i;
1289 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1290
1291 rdev->sb_size = MD_SB_BYTES;
1292
1293 sb = page_address(rdev->sb_page);
1294
1295 memset(sb, 0, sizeof(*sb));
1296
1297 sb->md_magic = MD_SB_MAGIC;
1298 sb->major_version = mddev->major_version;
1299 sb->patch_version = mddev->patch_version;
1300 sb->gvalid_words = 0;
1301 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1302 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1303 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1304 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1305
1306 sb->ctime = mddev->ctime;
1307 sb->level = mddev->level;
1308 sb->size = mddev->dev_sectors / 2;
1309 sb->raid_disks = mddev->raid_disks;
1310 sb->md_minor = mddev->md_minor;
1311 sb->not_persistent = 0;
1312 sb->utime = mddev->utime;
1313 sb->state = 0;
1314 sb->events_hi = (mddev->events>>32);
1315 sb->events_lo = (u32)mddev->events;
1316
1317 if (mddev->reshape_position == MaxSector)
1318 sb->minor_version = 90;
1319 else {
1320 sb->minor_version = 91;
1321 sb->reshape_position = mddev->reshape_position;
1322 sb->new_level = mddev->new_level;
1323 sb->delta_disks = mddev->delta_disks;
1324 sb->new_layout = mddev->new_layout;
1325 sb->new_chunk = mddev->new_chunk_sectors << 9;
1326 }
1327 mddev->minor_version = sb->minor_version;
1328 if (mddev->in_sync)
1329 {
1330 sb->recovery_cp = mddev->recovery_cp;
1331 sb->cp_events_hi = (mddev->events>>32);
1332 sb->cp_events_lo = (u32)mddev->events;
1333 if (mddev->recovery_cp == MaxSector)
1334 sb->state = (1<< MD_SB_CLEAN);
1335 } else
1336 sb->recovery_cp = 0;
1337
1338 sb->layout = mddev->layout;
1339 sb->chunk_size = mddev->chunk_sectors << 9;
1340
1341 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1342 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1343
1344 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1345 list_for_each_entry(rdev2, &mddev->disks, same_set) {
1346 mdp_disk_t *d;
1347 int desc_nr;
1348 int is_active = test_bit(In_sync, &rdev2->flags);
1349
1350 if (rdev2->raid_disk >= 0 &&
1351 sb->minor_version >= 91)
1352
1353
1354
1355
1356 is_active = 1;
1357 if (rdev2->raid_disk < 0 ||
1358 test_bit(Faulty, &rdev2->flags))
1359 is_active = 0;
1360 if (is_active)
1361 desc_nr = rdev2->raid_disk;
1362 else
1363 desc_nr = next_spare++;
1364 rdev2->desc_nr = desc_nr;
1365 d = &sb->disks[rdev2->desc_nr];
1366 nr_disks++;
1367 d->number = rdev2->desc_nr;
1368 d->major = MAJOR(rdev2->bdev->bd_dev);
1369 d->minor = MINOR(rdev2->bdev->bd_dev);
1370 if (is_active)
1371 d->raid_disk = rdev2->raid_disk;
1372 else
1373 d->raid_disk = rdev2->desc_nr;
1374 if (test_bit(Faulty, &rdev2->flags))
1375 d->state = (1<<MD_DISK_FAULTY);
1376 else if (is_active) {
1377 d->state = (1<<MD_DISK_ACTIVE);
1378 if (test_bit(In_sync, &rdev2->flags))
1379 d->state |= (1<<MD_DISK_SYNC);
1380 active++;
1381 working++;
1382 } else {
1383 d->state = 0;
1384 spare++;
1385 working++;
1386 }
1387 if (test_bit(WriteMostly, &rdev2->flags))
1388 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1389 }
1390
1391 for (i=0 ; i < mddev->raid_disks ; i++) {
1392 mdp_disk_t *d = &sb->disks[i];
1393 if (d->state == 0 && d->number == 0) {
1394 d->number = i;
1395 d->raid_disk = i;
1396 d->state = (1<<MD_DISK_REMOVED);
1397 d->state |= (1<<MD_DISK_FAULTY);
1398 failed++;
1399 }
1400 }
1401 sb->nr_disks = nr_disks;
1402 sb->active_disks = active;
1403 sb->working_disks = working;
1404 sb->failed_disks = failed;
1405 sb->spare_disks = spare;
1406
1407 sb->this_disk = sb->disks[rdev->desc_nr];
1408 sb->sb_csum = calc_sb_csum(sb);
1409}
1410
1411
1412
1413
1414static unsigned long long
1415super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1416{
1417 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1418 return 0;
1419 if (rdev->mddev->bitmap_info.offset)
1420 return 0;
1421 rdev->sb_start = calc_dev_sboffset(rdev);
1422 if (!num_sectors || num_sectors > rdev->sb_start)
1423 num_sectors = rdev->sb_start;
1424
1425
1426
1427 if (num_sectors >= (2ULL << 32))
1428 num_sectors = (2ULL << 32) - 2;
1429 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1430 rdev->sb_page);
1431 md_super_wait(rdev->mddev);
1432 return num_sectors;
1433}
1434
1435
1436
1437
1438
1439
1440static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1441{
1442 __le32 disk_csum;
1443 u32 csum;
1444 unsigned long long newcsum;
1445 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1446 __le32 *isuper = (__le32*)sb;
1447 int i;
1448
1449 disk_csum = sb->sb_csum;
1450 sb->sb_csum = 0;
1451 newcsum = 0;
1452 for (i=0; size>=4; size -= 4 )
1453 newcsum += le32_to_cpu(*isuper++);
1454
1455 if (size == 2)
1456 newcsum += le16_to_cpu(*(__le16*) isuper);
1457
1458 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1459 sb->sb_csum = disk_csum;
1460 return cpu_to_le32(csum);
1461}
1462
1463static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
1464 int acknowledged);
1465static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1466{
1467 struct mdp_superblock_1 *sb;
1468 int ret;
1469 sector_t sb_start;
1470 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1471 int bmask;
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481 switch(minor_version) {
1482 case 0:
1483 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1484 sb_start -= 8*2;
1485 sb_start &= ~(sector_t)(4*2-1);
1486 break;
1487 case 1:
1488 sb_start = 0;
1489 break;
1490 case 2:
1491 sb_start = 8;
1492 break;
1493 default:
1494 return -EINVAL;
1495 }
1496 rdev->sb_start = sb_start;
1497
1498
1499
1500
1501 ret = read_disk_sb(rdev, 4096);
1502 if (ret) return ret;
1503
1504
1505 sb = page_address(rdev->sb_page);
1506
1507 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1508 sb->major_version != cpu_to_le32(1) ||
1509 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1510 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1511 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1512 return -EINVAL;
1513
1514 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1515 printk("md: invalid superblock checksum on %s\n",
1516 bdevname(rdev->bdev,b));
1517 return -EINVAL;
1518 }
1519 if (le64_to_cpu(sb->data_size) < 10) {
1520 printk("md: data_size too small on %s\n",
1521 bdevname(rdev->bdev,b));
1522 return -EINVAL;
1523 }
1524
1525 rdev->preferred_minor = 0xffff;
1526 rdev->data_offset = le64_to_cpu(sb->data_offset);
1527 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1528
1529 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1530 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1531 if (rdev->sb_size & bmask)
1532 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1533
1534 if (minor_version
1535 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1536 return -EINVAL;
1537
1538 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1539 rdev->desc_nr = -1;
1540 else
1541 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1542
1543 if (!rdev->bb_page) {
1544 rdev->bb_page = alloc_page(GFP_KERNEL);
1545 if (!rdev->bb_page)
1546 return -ENOMEM;
1547 }
1548 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1549 rdev->badblocks.count == 0) {
1550
1551
1552
1553 s32 offset;
1554 sector_t bb_sector;
1555 u64 *bbp;
1556 int i;
1557 int sectors = le16_to_cpu(sb->bblog_size);
1558 if (sectors > (PAGE_SIZE / 512))
1559 return -EINVAL;
1560 offset = le32_to_cpu(sb->bblog_offset);
1561 if (offset == 0)
1562 return -EINVAL;
1563 bb_sector = (long long)offset;
1564 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1565 rdev->bb_page, READ, true))
1566 return -EIO;
1567 bbp = (u64 *)page_address(rdev->bb_page);
1568 rdev->badblocks.shift = sb->bblog_shift;
1569 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1570 u64 bb = le64_to_cpu(*bbp);
1571 int count = bb & (0x3ff);
1572 u64 sector = bb >> 10;
1573 sector <<= sb->bblog_shift;
1574 count <<= sb->bblog_shift;
1575 if (bb + 1 == 0)
1576 break;
1577 if (md_set_badblocks(&rdev->badblocks,
1578 sector, count, 1) == 0)
1579 return -EINVAL;
1580 }
1581 } else if (sb->bblog_offset == 0)
1582 rdev->badblocks.shift = -1;
1583
1584 if (!refdev) {
1585 ret = 1;
1586 } else {
1587 __u64 ev1, ev2;
1588 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1589
1590 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1591 sb->level != refsb->level ||
1592 sb->layout != refsb->layout ||
1593 sb->chunksize != refsb->chunksize) {
1594 printk(KERN_WARNING "md: %s has strangely different"
1595 " superblock to %s\n",
1596 bdevname(rdev->bdev,b),
1597 bdevname(refdev->bdev,b2));
1598 return -EINVAL;
1599 }
1600 ev1 = le64_to_cpu(sb->events);
1601 ev2 = le64_to_cpu(refsb->events);
1602
1603 if (ev1 > ev2)
1604 ret = 1;
1605 else
1606 ret = 0;
1607 }
1608 if (minor_version)
1609 rdev->sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
1610 le64_to_cpu(sb->data_offset);
1611 else
1612 rdev->sectors = rdev->sb_start;
1613 if (rdev->sectors < le64_to_cpu(sb->data_size))
1614 return -EINVAL;
1615 rdev->sectors = le64_to_cpu(sb->data_size);
1616 if (le64_to_cpu(sb->size) > rdev->sectors)
1617 return -EINVAL;
1618 return ret;
1619}
1620
1621static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1622{
1623 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1624 __u64 ev1 = le64_to_cpu(sb->events);
1625
1626 rdev->raid_disk = -1;
1627 clear_bit(Faulty, &rdev->flags);
1628 clear_bit(In_sync, &rdev->flags);
1629 clear_bit(WriteMostly, &rdev->flags);
1630
1631 if (mddev->raid_disks == 0) {
1632 mddev->major_version = 1;
1633 mddev->patch_version = 0;
1634 mddev->external = 0;
1635 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1636 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1637 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
1638 mddev->level = le32_to_cpu(sb->level);
1639 mddev->clevel[0] = 0;
1640 mddev->layout = le32_to_cpu(sb->layout);
1641 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1642 mddev->dev_sectors = le64_to_cpu(sb->size);
1643 mddev->events = ev1;
1644 mddev->bitmap_info.offset = 0;
1645 mddev->bitmap_info.default_offset = 1024 >> 9;
1646
1647 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1648 memcpy(mddev->uuid, sb->set_uuid, 16);
1649
1650 mddev->max_disks = (4096-256)/2;
1651
1652 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1653 mddev->bitmap_info.file == NULL )
1654 mddev->bitmap_info.offset =
1655 (__s32)le32_to_cpu(sb->bitmap_offset);
1656
1657 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1658 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1659 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1660 mddev->new_level = le32_to_cpu(sb->new_level);
1661 mddev->new_layout = le32_to_cpu(sb->new_layout);
1662 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1663 } else {
1664 mddev->reshape_position = MaxSector;
1665 mddev->delta_disks = 0;
1666 mddev->new_level = mddev->level;
1667 mddev->new_layout = mddev->layout;
1668 mddev->new_chunk_sectors = mddev->chunk_sectors;
1669 }
1670
1671 } else if (mddev->pers == NULL) {
1672
1673
1674 ++ev1;
1675 if (rdev->desc_nr >= 0 &&
1676 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1677 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < 0xfffe)
1678 if (ev1 < mddev->events)
1679 return -EINVAL;
1680 } else if (mddev->bitmap) {
1681
1682
1683
1684 if (ev1 < mddev->bitmap->events_cleared)
1685 return 0;
1686 } else {
1687 if (ev1 < mddev->events)
1688
1689 return 0;
1690 }
1691 if (mddev->level != LEVEL_MULTIPATH) {
1692 int role;
1693 if (rdev->desc_nr < 0 ||
1694 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1695 role = 0xffff;
1696 rdev->desc_nr = -1;
1697 } else
1698 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1699 switch(role) {
1700 case 0xffff:
1701 break;
1702 case 0xfffe:
1703 set_bit(Faulty, &rdev->flags);
1704 break;
1705 default:
1706 if ((le32_to_cpu(sb->feature_map) &
1707 MD_FEATURE_RECOVERY_OFFSET))
1708 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1709 else
1710 set_bit(In_sync, &rdev->flags);
1711 rdev->raid_disk = role;
1712 break;
1713 }
1714 if (sb->devflags & WriteMostly1)
1715 set_bit(WriteMostly, &rdev->flags);
1716 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1717 set_bit(Replacement, &rdev->flags);
1718 } else
1719 set_bit(In_sync, &rdev->flags);
1720
1721 return 0;
1722}
1723
1724static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1725{
1726 struct mdp_superblock_1 *sb;
1727 struct md_rdev *rdev2;
1728 int max_dev, i;
1729
1730
1731 sb = page_address(rdev->sb_page);
1732
1733 sb->feature_map = 0;
1734 sb->pad0 = 0;
1735 sb->recovery_offset = cpu_to_le64(0);
1736 memset(sb->pad1, 0, sizeof(sb->pad1));
1737 memset(sb->pad3, 0, sizeof(sb->pad3));
1738
1739 sb->utime = cpu_to_le64((__u64)mddev->utime);
1740 sb->events = cpu_to_le64(mddev->events);
1741 if (mddev->in_sync)
1742 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1743 else
1744 sb->resync_offset = cpu_to_le64(0);
1745
1746 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1747
1748 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1749 sb->size = cpu_to_le64(mddev->dev_sectors);
1750 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1751 sb->level = cpu_to_le32(mddev->level);
1752 sb->layout = cpu_to_le32(mddev->layout);
1753
1754 if (test_bit(WriteMostly, &rdev->flags))
1755 sb->devflags |= WriteMostly1;
1756 else
1757 sb->devflags &= ~WriteMostly1;
1758
1759 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1760 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1761 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1762 }
1763
1764 if (rdev->raid_disk >= 0 &&
1765 !test_bit(In_sync, &rdev->flags)) {
1766 sb->feature_map |=
1767 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1768 sb->recovery_offset =
1769 cpu_to_le64(rdev->recovery_offset);
1770 }
1771 if (test_bit(Replacement, &rdev->flags))
1772 sb->feature_map |=
1773 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1774
1775 if (mddev->reshape_position != MaxSector) {
1776 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1777 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1778 sb->new_layout = cpu_to_le32(mddev->new_layout);
1779 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1780 sb->new_level = cpu_to_le32(mddev->new_level);
1781 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1782 }
1783
1784 if (rdev->badblocks.count == 0)
1785 ;
1786 else if (sb->bblog_offset == 0)
1787
1788 md_error(mddev, rdev);
1789 else {
1790 struct badblocks *bb = &rdev->badblocks;
1791 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1792 u64 *p = bb->page;
1793 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1794 if (bb->changed) {
1795 unsigned seq;
1796
1797retry:
1798 seq = read_seqbegin(&bb->lock);
1799
1800 memset(bbp, 0xff, PAGE_SIZE);
1801
1802 for (i = 0 ; i < bb->count ; i++) {
1803 u64 internal_bb = *p++;
1804 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1805 | BB_LEN(internal_bb));
1806 *bbp++ = cpu_to_le64(store_bb);
1807 }
1808 if (read_seqretry(&bb->lock, seq))
1809 goto retry;
1810
1811 bb->sector = (rdev->sb_start +
1812 (int)le32_to_cpu(sb->bblog_offset));
1813 bb->size = le16_to_cpu(sb->bblog_size);
1814 bb->changed = 0;
1815 }
1816 }
1817
1818 max_dev = 0;
1819 list_for_each_entry(rdev2, &mddev->disks, same_set)
1820 if (rdev2->desc_nr+1 > max_dev)
1821 max_dev = rdev2->desc_nr+1;
1822
1823 if (max_dev > le32_to_cpu(sb->max_dev)) {
1824 int bmask;
1825 sb->max_dev = cpu_to_le32(max_dev);
1826 rdev->sb_size = max_dev * 2 + 256;
1827 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1828 if (rdev->sb_size & bmask)
1829 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1830 } else
1831 max_dev = le32_to_cpu(sb->max_dev);
1832
1833 for (i=0; i<max_dev;i++)
1834 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1835
1836 list_for_each_entry(rdev2, &mddev->disks, same_set) {
1837 i = rdev2->desc_nr;
1838 if (test_bit(Faulty, &rdev2->flags))
1839 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1840 else if (test_bit(In_sync, &rdev2->flags))
1841 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1842 else if (rdev2->raid_disk >= 0)
1843 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1844 else
1845 sb->dev_roles[i] = cpu_to_le16(0xffff);
1846 }
1847
1848 sb->sb_csum = calc_sb_1_csum(sb);
1849}
1850
1851static unsigned long long
1852super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1853{
1854 struct mdp_superblock_1 *sb;
1855 sector_t max_sectors;
1856 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1857 return 0;
1858 if (rdev->sb_start < rdev->data_offset) {
1859
1860 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1861 max_sectors -= rdev->data_offset;
1862 if (!num_sectors || num_sectors > max_sectors)
1863 num_sectors = max_sectors;
1864 } else if (rdev->mddev->bitmap_info.offset) {
1865
1866 return 0;
1867 } else {
1868
1869 sector_t sb_start;
1870 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1871 sb_start &= ~(sector_t)(4*2 - 1);
1872 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1873 if (!num_sectors || num_sectors > max_sectors)
1874 num_sectors = max_sectors;
1875 rdev->sb_start = sb_start;
1876 }
1877 sb = page_address(rdev->sb_page);
1878 sb->data_size = cpu_to_le64(num_sectors);
1879 sb->super_offset = rdev->sb_start;
1880 sb->sb_csum = calc_sb_1_csum(sb);
1881 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1882 rdev->sb_page);
1883 md_super_wait(rdev->mddev);
1884 return num_sectors;
1885}
1886
1887static struct super_type super_types[] = {
1888 [0] = {
1889 .name = "0.90.0",
1890 .owner = THIS_MODULE,
1891 .load_super = super_90_load,
1892 .validate_super = super_90_validate,
1893 .sync_super = super_90_sync,
1894 .rdev_size_change = super_90_rdev_size_change,
1895 },
1896 [1] = {
1897 .name = "md-1",
1898 .owner = THIS_MODULE,
1899 .load_super = super_1_load,
1900 .validate_super = super_1_validate,
1901 .sync_super = super_1_sync,
1902 .rdev_size_change = super_1_rdev_size_change,
1903 },
1904};
1905
1906static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
1907{
1908 if (mddev->sync_super) {
1909 mddev->sync_super(mddev, rdev);
1910 return;
1911 }
1912
1913 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1914
1915 super_types[mddev->major_version].sync_super(mddev, rdev);
1916}
1917
1918static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
1919{
1920 struct md_rdev *rdev, *rdev2;
1921
1922 rcu_read_lock();
1923 rdev_for_each_rcu(rdev, mddev1)
1924 rdev_for_each_rcu(rdev2, mddev2)
1925 if (rdev->bdev->bd_contains ==
1926 rdev2->bdev->bd_contains) {
1927 rcu_read_unlock();
1928 return 1;
1929 }
1930 rcu_read_unlock();
1931 return 0;
1932}
1933
1934static LIST_HEAD(pending_raid_disks);
1935
1936
1937
1938
1939
1940
1941
1942
1943int md_integrity_register(struct mddev *mddev)
1944{
1945 struct md_rdev *rdev, *reference = NULL;
1946
1947 if (list_empty(&mddev->disks))
1948 return 0;
1949 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
1950 return 0;
1951 list_for_each_entry(rdev, &mddev->disks, same_set) {
1952
1953 if (test_bit(Faulty, &rdev->flags))
1954 continue;
1955 if (rdev->raid_disk < 0)
1956 continue;
1957 if (!reference) {
1958
1959 reference = rdev;
1960 continue;
1961 }
1962
1963 if (blk_integrity_compare(reference->bdev->bd_disk,
1964 rdev->bdev->bd_disk) < 0)
1965 return -EINVAL;
1966 }
1967 if (!reference || !bdev_get_integrity(reference->bdev))
1968 return 0;
1969
1970
1971
1972
1973 if (blk_integrity_register(mddev->gendisk,
1974 bdev_get_integrity(reference->bdev)) != 0) {
1975 printk(KERN_ERR "md: failed to register integrity for %s\n",
1976 mdname(mddev));
1977 return -EINVAL;
1978 }
1979 printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
1980 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
1981 printk(KERN_ERR "md: failed to create integrity pool for %s\n",
1982 mdname(mddev));
1983 return -EINVAL;
1984 }
1985 return 0;
1986}
1987EXPORT_SYMBOL(md_integrity_register);
1988
1989
1990void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
1991{
1992 struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
1993 struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk);
1994
1995 if (!bi_mddev)
1996 return;
1997 if (rdev->raid_disk < 0)
1998 return;
1999 if (bi_rdev && blk_integrity_compare(mddev->gendisk,
2000 rdev->bdev->bd_disk) >= 0)
2001 return;
2002 printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
2003 blk_integrity_unregister(mddev->gendisk);
2004}
2005EXPORT_SYMBOL(md_integrity_add_rdev);
2006
2007static int bind_rdev_to_array(struct md_rdev * rdev, struct mddev * mddev)
2008{
2009 char b[BDEVNAME_SIZE];
2010 struct kobject *ko;
2011 char *s;
2012 int err;
2013
2014 if (rdev->mddev) {
2015 MD_BUG();
2016 return -EINVAL;
2017 }
2018
2019
2020 if (find_rdev(mddev, rdev->bdev->bd_dev))
2021 return -EEXIST;
2022
2023
2024 if (rdev->sectors && (mddev->dev_sectors == 0 ||
2025 rdev->sectors < mddev->dev_sectors)) {
2026 if (mddev->pers) {
2027
2028
2029
2030
2031 if (mddev->level > 0)
2032 return -ENOSPC;
2033 } else
2034 mddev->dev_sectors = rdev->sectors;
2035 }
2036
2037
2038
2039
2040
2041 if (rdev->desc_nr < 0) {
2042 int choice = 0;
2043 if (mddev->pers) choice = mddev->raid_disks;
2044 while (find_rdev_nr(mddev, choice))
2045 choice++;
2046 rdev->desc_nr = choice;
2047 } else {
2048 if (find_rdev_nr(mddev, rdev->desc_nr))
2049 return -EBUSY;
2050 }
2051 if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2052 printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
2053 mdname(mddev), mddev->max_disks);
2054 return -EBUSY;
2055 }
2056 bdevname(rdev->bdev,b);
2057 while ( (s=strchr(b, '/')) != NULL)
2058 *s = '!';
2059
2060 rdev->mddev = mddev;
2061 printk(KERN_INFO "md: bind<%s>\n", b);
2062
2063 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2064 goto fail;
2065
2066 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2067 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2068 ;
2069 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2070
2071 list_add_rcu(&rdev->same_set, &mddev->disks);
2072 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2073
2074
2075 mddev->recovery_disabled++;
2076
2077 return 0;
2078
2079 fail:
2080 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
2081 b, mdname(mddev));
2082 return err;
2083}
2084
2085static void md_delayed_delete(struct work_struct *ws)
2086{
2087 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2088 kobject_del(&rdev->kobj);
2089 kobject_put(&rdev->kobj);
2090}
2091
2092static void unbind_rdev_from_array(struct md_rdev * rdev)
2093{
2094 char b[BDEVNAME_SIZE];
2095 if (!rdev->mddev) {
2096 MD_BUG();
2097 return;
2098 }
2099 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2100 list_del_rcu(&rdev->same_set);
2101 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
2102 rdev->mddev = NULL;
2103 sysfs_remove_link(&rdev->kobj, "block");
2104 sysfs_put(rdev->sysfs_state);
2105 rdev->sysfs_state = NULL;
2106 kfree(rdev->badblocks.page);
2107 rdev->badblocks.count = 0;
2108 rdev->badblocks.page = NULL;
2109
2110
2111
2112
2113 synchronize_rcu();
2114 INIT_WORK(&rdev->del_work, md_delayed_delete);
2115 kobject_get(&rdev->kobj);
2116 queue_work(md_misc_wq, &rdev->del_work);
2117}
2118
2119
2120
2121
2122
2123
2124static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2125{
2126 int err = 0;
2127 struct block_device *bdev;
2128 char b[BDEVNAME_SIZE];
2129
2130 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2131 shared ? (struct md_rdev *)lock_rdev : rdev);
2132 if (IS_ERR(bdev)) {
2133 printk(KERN_ERR "md: could not open %s.\n",
2134 __bdevname(dev, b));
2135 return PTR_ERR(bdev);
2136 }
2137 rdev->bdev = bdev;
2138 return err;
2139}
2140
2141static void unlock_rdev(struct md_rdev *rdev)
2142{
2143 struct block_device *bdev = rdev->bdev;
2144 rdev->bdev = NULL;
2145 if (!bdev)
2146 MD_BUG();
2147 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2148}
2149
2150void md_autodetect_dev(dev_t dev);
2151
2152static void export_rdev(struct md_rdev * rdev)
2153{
2154 char b[BDEVNAME_SIZE];
2155 printk(KERN_INFO "md: export_rdev(%s)\n",
2156 bdevname(rdev->bdev,b));
2157 if (rdev->mddev)
2158 MD_BUG();
2159 free_disk_sb(rdev);
2160#ifndef MODULE
2161 if (test_bit(AutoDetected, &rdev->flags))
2162 md_autodetect_dev(rdev->bdev->bd_dev);
2163#endif
2164 unlock_rdev(rdev);
2165 kobject_put(&rdev->kobj);
2166}
2167
2168static void kick_rdev_from_array(struct md_rdev * rdev)
2169{
2170 unbind_rdev_from_array(rdev);
2171 export_rdev(rdev);
2172}
2173
2174static void export_array(struct mddev *mddev)
2175{
2176 struct md_rdev *rdev, *tmp;
2177
2178 rdev_for_each(rdev, tmp, mddev) {
2179 if (!rdev->mddev) {
2180 MD_BUG();
2181 continue;
2182 }
2183 kick_rdev_from_array(rdev);
2184 }
2185 if (!list_empty(&mddev->disks))
2186 MD_BUG();
2187 mddev->raid_disks = 0;
2188 mddev->major_version = 0;
2189}
2190
2191static void print_desc(mdp_disk_t *desc)
2192{
2193 printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
2194 desc->major,desc->minor,desc->raid_disk,desc->state);
2195}
2196
2197static void print_sb_90(mdp_super_t *sb)
2198{
2199 int i;
2200
2201 printk(KERN_INFO
2202 "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
2203 sb->major_version, sb->minor_version, sb->patch_version,
2204 sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
2205 sb->ctime);
2206 printk(KERN_INFO "md: L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
2207 sb->level, sb->size, sb->nr_disks, sb->raid_disks,
2208 sb->md_minor, sb->layout, sb->chunk_size);
2209 printk(KERN_INFO "md: UT:%08x ST:%d AD:%d WD:%d"
2210 " FD:%d SD:%d CSUM:%08x E:%08lx\n",
2211 sb->utime, sb->state, sb->active_disks, sb->working_disks,
2212 sb->failed_disks, sb->spare_disks,
2213 sb->sb_csum, (unsigned long)sb->events_lo);
2214
2215 printk(KERN_INFO);
2216 for (i = 0; i < MD_SB_DISKS; i++) {
2217 mdp_disk_t *desc;
2218
2219 desc = sb->disks + i;
2220 if (desc->number || desc->major || desc->minor ||
2221 desc->raid_disk || (desc->state && (desc->state != 4))) {
2222 printk(" D %2d: ", i);
2223 print_desc(desc);
2224 }
2225 }
2226 printk(KERN_INFO "md: THIS: ");
2227 print_desc(&sb->this_disk);
2228}
2229
2230static void print_sb_1(struct mdp_superblock_1 *sb)
2231{
2232 __u8 *uuid;
2233
2234 uuid = sb->set_uuid;
2235 printk(KERN_INFO
2236 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n"
2237 "md: Name: \"%s\" CT:%llu\n",
2238 le32_to_cpu(sb->major_version),
2239 le32_to_cpu(sb->feature_map),
2240 uuid,
2241 sb->set_name,
2242 (unsigned long long)le64_to_cpu(sb->ctime)
2243 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
2244
2245 uuid = sb->device_uuid;
2246 printk(KERN_INFO
2247 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
2248 " RO:%llu\n"
2249 "md: Dev:%08x UUID: %pU\n"
2250 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
2251 "md: (MaxDev:%u) \n",
2252 le32_to_cpu(sb->level),
2253 (unsigned long long)le64_to_cpu(sb->size),
2254 le32_to_cpu(sb->raid_disks),
2255 le32_to_cpu(sb->layout),
2256 le32_to_cpu(sb->chunksize),
2257 (unsigned long long)le64_to_cpu(sb->data_offset),
2258 (unsigned long long)le64_to_cpu(sb->data_size),
2259 (unsigned long long)le64_to_cpu(sb->super_offset),
2260 (unsigned long long)le64_to_cpu(sb->recovery_offset),
2261 le32_to_cpu(sb->dev_number),
2262 uuid,
2263 sb->devflags,
2264 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
2265 (unsigned long long)le64_to_cpu(sb->events),
2266 (unsigned long long)le64_to_cpu(sb->resync_offset),
2267 le32_to_cpu(sb->sb_csum),
2268 le32_to_cpu(sb->max_dev)
2269 );
2270}
2271
2272static void print_rdev(struct md_rdev *rdev, int major_version)
2273{
2274 char b[BDEVNAME_SIZE];
2275 printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
2276 bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
2277 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
2278 rdev->desc_nr);
2279 if (rdev->sb_loaded) {
2280 printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
2281 switch (major_version) {
2282 case 0:
2283 print_sb_90(page_address(rdev->sb_page));
2284 break;
2285 case 1:
2286 print_sb_1(page_address(rdev->sb_page));
2287 break;
2288 }
2289 } else
2290 printk(KERN_INFO "md: no rdev superblock!\n");
2291}
2292
2293static void md_print_devices(void)
2294{
2295 struct list_head *tmp;
2296 struct md_rdev *rdev;
2297 struct mddev *mddev;
2298 char b[BDEVNAME_SIZE];
2299
2300 printk("\n");
2301 printk("md: **********************************\n");
2302 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
2303 printk("md: **********************************\n");
2304 for_each_mddev(mddev, tmp) {
2305
2306 if (mddev->bitmap)
2307 bitmap_print_sb(mddev->bitmap);
2308 else
2309 printk("%s: ", mdname(mddev));
2310 list_for_each_entry(rdev, &mddev->disks, same_set)
2311 printk("<%s>", bdevname(rdev->bdev,b));
2312 printk("\n");
2313
2314 list_for_each_entry(rdev, &mddev->disks, same_set)
2315 print_rdev(rdev, mddev->major_version);
2316 }
2317 printk("md: **********************************\n");
2318 printk("\n");
2319}
2320
2321
2322static void sync_sbs(struct mddev * mddev, int nospares)
2323{
2324
2325
2326
2327
2328
2329
2330 struct md_rdev *rdev;
2331 list_for_each_entry(rdev, &mddev->disks, same_set) {
2332 if (rdev->sb_events == mddev->events ||
2333 (nospares &&
2334 rdev->raid_disk < 0 &&
2335 rdev->sb_events+1 == mddev->events)) {
2336
2337 rdev->sb_loaded = 2;
2338 } else {
2339 sync_super(mddev, rdev);
2340 rdev->sb_loaded = 1;
2341 }
2342 }
2343}
2344
2345static void md_update_sb(struct mddev * mddev, int force_change)
2346{
2347 struct md_rdev *rdev;
2348 int sync_req;
2349 int nospares = 0;
2350 int any_badblocks_changed = 0;
2351
2352repeat:
2353
2354 list_for_each_entry(rdev, &mddev->disks, same_set) {
2355 if (rdev->raid_disk >= 0 &&
2356 mddev->delta_disks >= 0 &&
2357 !test_bit(In_sync, &rdev->flags) &&
2358 mddev->curr_resync_completed > rdev->recovery_offset)
2359 rdev->recovery_offset = mddev->curr_resync_completed;
2360
2361 }
2362 if (!mddev->persistent) {
2363 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2364 clear_bit(MD_CHANGE_DEVS, &mddev->flags);
2365 if (!mddev->external) {
2366 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2367 list_for_each_entry(rdev, &mddev->disks, same_set) {
2368 if (rdev->badblocks.changed) {
2369 md_ack_all_badblocks(&rdev->badblocks);
2370 md_error(mddev, rdev);
2371 }
2372 clear_bit(Blocked, &rdev->flags);
2373 clear_bit(BlockedBadBlocks, &rdev->flags);
2374 wake_up(&rdev->blocked_wait);
2375 }
2376 }
2377 wake_up(&mddev->sb_wait);
2378 return;
2379 }
2380
2381 spin_lock_irq(&mddev->write_lock);
2382
2383 mddev->utime = get_seconds();
2384
2385 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2386 force_change = 1;
2387 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2388
2389
2390
2391
2392 nospares = 1;
2393 if (force_change)
2394 nospares = 0;
2395 if (mddev->degraded)
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405 nospares = 0;
2406
2407 sync_req = mddev->in_sync;
2408
2409
2410
2411 if (nospares
2412 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2413 && mddev->can_decrease_events
2414 && mddev->events != 1) {
2415 mddev->events--;
2416 mddev->can_decrease_events = 0;
2417 } else {
2418
2419 mddev->events ++;
2420 mddev->can_decrease_events = nospares;
2421 }
2422
2423 if (!mddev->events) {
2424
2425
2426
2427
2428
2429 MD_BUG();
2430 mddev->events --;
2431 }
2432
2433 list_for_each_entry(rdev, &mddev->disks, same_set) {
2434 if (rdev->badblocks.changed)
2435 any_badblocks_changed++;
2436 if (test_bit(Faulty, &rdev->flags))
2437 set_bit(FaultRecorded, &rdev->flags);
2438 }
2439
2440 sync_sbs(mddev, nospares);
2441 spin_unlock_irq(&mddev->write_lock);
2442
2443 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2444 mdname(mddev), mddev->in_sync);
2445
2446 bitmap_update_sb(mddev->bitmap);
2447 list_for_each_entry(rdev, &mddev->disks, same_set) {
2448 char b[BDEVNAME_SIZE];
2449
2450 if (rdev->sb_loaded != 1)
2451 continue;
2452
2453 if (!test_bit(Faulty, &rdev->flags) &&
2454 rdev->saved_raid_disk == -1) {
2455 md_super_write(mddev,rdev,
2456 rdev->sb_start, rdev->sb_size,
2457 rdev->sb_page);
2458 pr_debug("md: (write) %s's sb offset: %llu\n",
2459 bdevname(rdev->bdev, b),
2460 (unsigned long long)rdev->sb_start);
2461 rdev->sb_events = mddev->events;
2462 if (rdev->badblocks.size) {
2463 md_super_write(mddev, rdev,
2464 rdev->badblocks.sector,
2465 rdev->badblocks.size << 9,
2466 rdev->bb_page);
2467 rdev->badblocks.size = 0;
2468 }
2469
2470 } else if (test_bit(Faulty, &rdev->flags))
2471 pr_debug("md: %s (skipping faulty)\n",
2472 bdevname(rdev->bdev, b));
2473 else
2474 pr_debug("(skipping incremental s/r ");
2475
2476 if (mddev->level == LEVEL_MULTIPATH)
2477
2478 break;
2479 }
2480 md_super_wait(mddev);
2481
2482
2483 spin_lock_irq(&mddev->write_lock);
2484 if (mddev->in_sync != sync_req ||
2485 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
2486
2487 spin_unlock_irq(&mddev->write_lock);
2488 goto repeat;
2489 }
2490 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2491 spin_unlock_irq(&mddev->write_lock);
2492 wake_up(&mddev->sb_wait);
2493 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2494 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2495
2496 list_for_each_entry(rdev, &mddev->disks, same_set) {
2497 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2498 clear_bit(Blocked, &rdev->flags);
2499
2500 if (any_badblocks_changed)
2501 md_ack_all_badblocks(&rdev->badblocks);
2502 clear_bit(BlockedBadBlocks, &rdev->flags);
2503 wake_up(&rdev->blocked_wait);
2504 }
2505}
2506
2507
2508
2509
2510static int cmd_match(const char *cmd, const char *str)
2511{
2512
2513
2514
2515
2516 while (*cmd && *str && *cmd == *str) {
2517 cmd++;
2518 str++;
2519 }
2520 if (*cmd == '\n')
2521 cmd++;
2522 if (*str || *cmd)
2523 return 0;
2524 return 1;
2525}
2526
2527struct rdev_sysfs_entry {
2528 struct attribute attr;
2529 ssize_t (*show)(struct md_rdev *, char *);
2530 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2531};
2532
2533static ssize_t
2534state_show(struct md_rdev *rdev, char *page)
2535{
2536 char *sep = "";
2537 size_t len = 0;
2538
2539 if (test_bit(Faulty, &rdev->flags) ||
2540 rdev->badblocks.unacked_exist) {
2541 len+= sprintf(page+len, "%sfaulty",sep);
2542 sep = ",";
2543 }
2544 if (test_bit(In_sync, &rdev->flags)) {
2545 len += sprintf(page+len, "%sin_sync",sep);
2546 sep = ",";
2547 }
2548 if (test_bit(WriteMostly, &rdev->flags)) {
2549 len += sprintf(page+len, "%swrite_mostly",sep);
2550 sep = ",";
2551 }
2552 if (test_bit(Blocked, &rdev->flags) ||
2553 (rdev->badblocks.unacked_exist
2554 && !test_bit(Faulty, &rdev->flags))) {
2555 len += sprintf(page+len, "%sblocked", sep);
2556 sep = ",";
2557 }
2558 if (!test_bit(Faulty, &rdev->flags) &&
2559 !test_bit(In_sync, &rdev->flags)) {
2560 len += sprintf(page+len, "%sspare", sep);
2561 sep = ",";
2562 }
2563 if (test_bit(WriteErrorSeen, &rdev->flags)) {
2564 len += sprintf(page+len, "%swrite_error", sep);
2565 sep = ",";
2566 }
2567 if (test_bit(WantReplacement, &rdev->flags)) {
2568 len += sprintf(page+len, "%swant_replacement", sep);
2569 sep = ",";
2570 }
2571 if (test_bit(Replacement, &rdev->flags)) {
2572 len += sprintf(page+len, "%sreplacement", sep);
2573 sep = ",";
2574 }
2575
2576 return len+sprintf(page+len, "\n");
2577}
2578
2579static ssize_t
2580state_store(struct md_rdev *rdev, const char *buf, size_t len)
2581{
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593 int err = -EINVAL;
2594 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2595 md_error(rdev->mddev, rdev);
2596 if (test_bit(Faulty, &rdev->flags))
2597 err = 0;
2598 else
2599 err = -EBUSY;
2600 } else if (cmd_match(buf, "remove")) {
2601 if (rdev->raid_disk >= 0)
2602 err = -EBUSY;
2603 else {
2604 struct mddev *mddev = rdev->mddev;
2605 kick_rdev_from_array(rdev);
2606 if (mddev->pers)
2607 md_update_sb(mddev, 1);
2608 md_new_event(mddev);
2609 err = 0;
2610 }
2611 } else if (cmd_match(buf, "writemostly")) {
2612 set_bit(WriteMostly, &rdev->flags);
2613 err = 0;
2614 } else if (cmd_match(buf, "-writemostly")) {
2615 clear_bit(WriteMostly, &rdev->flags);
2616 err = 0;
2617 } else if (cmd_match(buf, "blocked")) {
2618 set_bit(Blocked, &rdev->flags);
2619 err = 0;
2620 } else if (cmd_match(buf, "-blocked")) {
2621 if (!test_bit(Faulty, &rdev->flags) &&
2622 rdev->badblocks.unacked_exist) {
2623
2624
2625
2626 md_error(rdev->mddev, rdev);
2627 }
2628 clear_bit(Blocked, &rdev->flags);
2629 clear_bit(BlockedBadBlocks, &rdev->flags);
2630 wake_up(&rdev->blocked_wait);
2631 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2632 md_wakeup_thread(rdev->mddev->thread);
2633
2634 err = 0;
2635 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2636 set_bit(In_sync, &rdev->flags);
2637 err = 0;
2638 } else if (cmd_match(buf, "write_error")) {
2639 set_bit(WriteErrorSeen, &rdev->flags);
2640 err = 0;
2641 } else if (cmd_match(buf, "-write_error")) {
2642 clear_bit(WriteErrorSeen, &rdev->flags);
2643 err = 0;
2644 } else if (cmd_match(buf, "want_replacement")) {
2645
2646
2647
2648
2649 if (rdev->raid_disk >= 0 &&
2650 !test_bit(Replacement, &rdev->flags))
2651 set_bit(WantReplacement, &rdev->flags);
2652 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2653 md_wakeup_thread(rdev->mddev->thread);
2654 err = 0;
2655 } else if (cmd_match(buf, "-want_replacement")) {
2656
2657
2658
2659 err = 0;
2660 clear_bit(WantReplacement, &rdev->flags);
2661 } else if (cmd_match(buf, "replacement")) {
2662
2663
2664
2665
2666 if (rdev->mddev->pers)
2667 err = -EBUSY;
2668 else {
2669 set_bit(Replacement, &rdev->flags);
2670 err = 0;
2671 }
2672 } else if (cmd_match(buf, "-replacement")) {
2673
2674 if (rdev->mddev->pers)
2675 err = -EBUSY;
2676 else {
2677 clear_bit(Replacement, &rdev->flags);
2678 err = 0;
2679 }
2680 }
2681 if (!err)
2682 sysfs_notify_dirent_safe(rdev->sysfs_state);
2683 return err ? err : len;
2684}
2685static struct rdev_sysfs_entry rdev_state =
2686__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
2687
2688static ssize_t
2689errors_show(struct md_rdev *rdev, char *page)
2690{
2691 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2692}
2693
2694static ssize_t
2695errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2696{
2697 char *e;
2698 unsigned long n = simple_strtoul(buf, &e, 10);
2699 if (*buf && (*e == 0 || *e == '\n')) {
2700 atomic_set(&rdev->corrected_errors, n);
2701 return len;
2702 }
2703 return -EINVAL;
2704}
2705static struct rdev_sysfs_entry rdev_errors =
2706__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2707
2708static ssize_t
2709slot_show(struct md_rdev *rdev, char *page)
2710{
2711 if (rdev->raid_disk < 0)
2712 return sprintf(page, "none\n");
2713 else
2714 return sprintf(page, "%d\n", rdev->raid_disk);
2715}
2716
2717static ssize_t
2718slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2719{
2720 char *e;
2721 int err;
2722 int slot = simple_strtoul(buf, &e, 10);
2723 if (strncmp(buf, "none", 4)==0)
2724 slot = -1;
2725 else if (e==buf || (*e && *e!= '\n'))
2726 return -EINVAL;
2727 if (rdev->mddev->pers && slot == -1) {
2728
2729
2730
2731
2732
2733
2734
2735 if (rdev->raid_disk == -1)
2736 return -EEXIST;
2737
2738 if (rdev->mddev->pers->hot_remove_disk == NULL)
2739 return -EINVAL;
2740 err = rdev->mddev->pers->
2741 hot_remove_disk(rdev->mddev, rdev);
2742 if (err)
2743 return err;
2744 sysfs_unlink_rdev(rdev->mddev, rdev);
2745 rdev->raid_disk = -1;
2746 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2747 md_wakeup_thread(rdev->mddev->thread);
2748 } else if (rdev->mddev->pers) {
2749
2750
2751
2752
2753 if (rdev->raid_disk != -1)
2754 return -EBUSY;
2755
2756 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2757 return -EBUSY;
2758
2759 if (rdev->mddev->pers->hot_add_disk == NULL)
2760 return -EINVAL;
2761
2762 if (slot >= rdev->mddev->raid_disks &&
2763 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2764 return -ENOSPC;
2765
2766 rdev->raid_disk = slot;
2767 if (test_bit(In_sync, &rdev->flags))
2768 rdev->saved_raid_disk = slot;
2769 else
2770 rdev->saved_raid_disk = -1;
2771 clear_bit(In_sync, &rdev->flags);
2772 err = rdev->mddev->pers->
2773 hot_add_disk(rdev->mddev, rdev);
2774 if (err) {
2775 rdev->raid_disk = -1;
2776 return err;
2777 } else
2778 sysfs_notify_dirent_safe(rdev->sysfs_state);
2779 if (sysfs_link_rdev(rdev->mddev, rdev))
2780 ;
2781
2782 } else {
2783 if (slot >= rdev->mddev->raid_disks &&
2784 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2785 return -ENOSPC;
2786 rdev->raid_disk = slot;
2787
2788 clear_bit(Faulty, &rdev->flags);
2789 clear_bit(WriteMostly, &rdev->flags);
2790 set_bit(In_sync, &rdev->flags);
2791 sysfs_notify_dirent_safe(rdev->sysfs_state);
2792 }
2793 return len;
2794}
2795
2796
2797static struct rdev_sysfs_entry rdev_slot =
2798__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2799
2800static ssize_t
2801offset_show(struct md_rdev *rdev, char *page)
2802{
2803 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2804}
2805
2806static ssize_t
2807offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2808{
2809 char *e;
2810 unsigned long long offset = simple_strtoull(buf, &e, 10);
2811 if (e==buf || (*e && *e != '\n'))
2812 return -EINVAL;
2813 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2814 return -EBUSY;
2815 if (rdev->sectors && rdev->mddev->external)
2816
2817
2818 return -EBUSY;
2819 rdev->data_offset = offset;
2820 return len;
2821}
2822
2823static struct rdev_sysfs_entry rdev_offset =
2824__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2825
2826static ssize_t
2827rdev_size_show(struct md_rdev *rdev, char *page)
2828{
2829 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2830}
2831
2832static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2833{
2834
2835 if (s1+l1 <= s2)
2836 return 0;
2837 if (s2+l2 <= s1)
2838 return 0;
2839 return 1;
2840}
2841
2842static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2843{
2844 unsigned long long blocks;
2845 sector_t new;
2846
2847 if (strict_strtoull(buf, 10, &blocks) < 0)
2848 return -EINVAL;
2849
2850 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2851 return -EINVAL;
2852
2853 new = blocks * 2;
2854 if (new != blocks * 2)
2855 return -EINVAL;
2856
2857 *sectors = new;
2858 return 0;
2859}
2860
2861static ssize_t
2862rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
2863{
2864 struct mddev *my_mddev = rdev->mddev;
2865 sector_t oldsectors = rdev->sectors;
2866 sector_t sectors;
2867
2868 if (strict_blocks_to_sectors(buf, §ors) < 0)
2869 return -EINVAL;
2870 if (my_mddev->pers && rdev->raid_disk >= 0) {
2871 if (my_mddev->persistent) {
2872 sectors = super_types[my_mddev->major_version].
2873 rdev_size_change(rdev, sectors);
2874 if (!sectors)
2875 return -EBUSY;
2876 } else if (!sectors)
2877 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
2878 rdev->data_offset;
2879 }
2880 if (sectors < my_mddev->dev_sectors)
2881 return -EINVAL;
2882
2883 rdev->sectors = sectors;
2884 if (sectors > oldsectors && my_mddev->external) {
2885
2886
2887
2888
2889
2890 struct mddev *mddev;
2891 int overlap = 0;
2892 struct list_head *tmp;
2893
2894 mddev_unlock(my_mddev);
2895 for_each_mddev(mddev, tmp) {
2896 struct md_rdev *rdev2;
2897
2898 mddev_lock(mddev);
2899 list_for_each_entry(rdev2, &mddev->disks, same_set)
2900 if (rdev->bdev == rdev2->bdev &&
2901 rdev != rdev2 &&
2902 overlaps(rdev->data_offset, rdev->sectors,
2903 rdev2->data_offset,
2904 rdev2->sectors)) {
2905 overlap = 1;
2906 break;
2907 }
2908 mddev_unlock(mddev);
2909 if (overlap) {
2910 mddev_put(mddev);
2911 break;
2912 }
2913 }
2914 mddev_lock(my_mddev);
2915 if (overlap) {
2916
2917
2918
2919
2920
2921
2922 rdev->sectors = oldsectors;
2923 return -EBUSY;
2924 }
2925 }
2926 return len;
2927}
2928
2929static struct rdev_sysfs_entry rdev_size =
2930__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
2931
2932
2933static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
2934{
2935 unsigned long long recovery_start = rdev->recovery_offset;
2936
2937 if (test_bit(In_sync, &rdev->flags) ||
2938 recovery_start == MaxSector)
2939 return sprintf(page, "none\n");
2940
2941 return sprintf(page, "%llu\n", recovery_start);
2942}
2943
2944static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
2945{
2946 unsigned long long recovery_start;
2947
2948 if (cmd_match(buf, "none"))
2949 recovery_start = MaxSector;
2950 else if (strict_strtoull(buf, 10, &recovery_start))
2951 return -EINVAL;
2952
2953 if (rdev->mddev->pers &&
2954 rdev->raid_disk >= 0)
2955 return -EBUSY;
2956
2957 rdev->recovery_offset = recovery_start;
2958 if (recovery_start == MaxSector)
2959 set_bit(In_sync, &rdev->flags);
2960 else
2961 clear_bit(In_sync, &rdev->flags);
2962 return len;
2963}
2964
2965static struct rdev_sysfs_entry rdev_recovery_start =
2966__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
2967
2968
2969static ssize_t
2970badblocks_show(struct badblocks *bb, char *page, int unack);
2971static ssize_t
2972badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack);
2973
2974static ssize_t bb_show(struct md_rdev *rdev, char *page)
2975{
2976 return badblocks_show(&rdev->badblocks, page, 0);
2977}
2978static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
2979{
2980 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
2981
2982 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
2983 wake_up(&rdev->blocked_wait);
2984 return rv;
2985}
2986static struct rdev_sysfs_entry rdev_bad_blocks =
2987__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
2988
2989
2990static ssize_t ubb_show(struct md_rdev *rdev, char *page)
2991{
2992 return badblocks_show(&rdev->badblocks, page, 1);
2993}
2994static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
2995{
2996 return badblocks_store(&rdev->badblocks, page, len, 1);
2997}
2998static struct rdev_sysfs_entry rdev_unack_bad_blocks =
2999__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3000
3001static struct attribute *rdev_default_attrs[] = {
3002 &rdev_state.attr,
3003 &rdev_errors.attr,
3004 &rdev_slot.attr,
3005 &rdev_offset.attr,
3006 &rdev_size.attr,
3007 &rdev_recovery_start.attr,
3008 &rdev_bad_blocks.attr,
3009 &rdev_unack_bad_blocks.attr,
3010 NULL,
3011};
3012static ssize_t
3013rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3014{
3015 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3016 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3017 struct mddev *mddev = rdev->mddev;
3018 ssize_t rv;
3019
3020 if (!entry->show)
3021 return -EIO;
3022
3023 rv = mddev ? mddev_lock(mddev) : -EBUSY;
3024 if (!rv) {
3025 if (rdev->mddev == NULL)
3026 rv = -EBUSY;
3027 else
3028 rv = entry->show(rdev, page);
3029 mddev_unlock(mddev);
3030 }
3031 return rv;
3032}
3033
3034static ssize_t
3035rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3036 const char *page, size_t length)
3037{
3038 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3039 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3040 ssize_t rv;
3041 struct mddev *mddev = rdev->mddev;
3042
3043 if (!entry->store)
3044 return -EIO;
3045 if (!capable(CAP_SYS_ADMIN))
3046 return -EACCES;
3047 rv = mddev ? mddev_lock(mddev): -EBUSY;
3048 if (!rv) {
3049 if (rdev->mddev == NULL)
3050 rv = -EBUSY;
3051 else
3052 rv = entry->store(rdev, page, length);
3053 mddev_unlock(mddev);
3054 }
3055 return rv;
3056}
3057
3058static void rdev_free(struct kobject *ko)
3059{
3060 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3061 kfree(rdev);
3062}
3063static const struct sysfs_ops rdev_sysfs_ops = {
3064 .show = rdev_attr_show,
3065 .store = rdev_attr_store,
3066};
3067static struct kobj_type rdev_ktype = {
3068 .release = rdev_free,
3069 .sysfs_ops = &rdev_sysfs_ops,
3070 .default_attrs = rdev_default_attrs,
3071};
3072
3073int md_rdev_init(struct md_rdev *rdev)
3074{
3075 rdev->desc_nr = -1;
3076 rdev->saved_raid_disk = -1;
3077 rdev->raid_disk = -1;
3078 rdev->flags = 0;
3079 rdev->data_offset = 0;
3080 rdev->sb_events = 0;
3081 rdev->last_read_error.tv_sec = 0;
3082 rdev->last_read_error.tv_nsec = 0;
3083 rdev->sb_loaded = 0;
3084 rdev->bb_page = NULL;
3085 atomic_set(&rdev->nr_pending, 0);
3086 atomic_set(&rdev->read_errors, 0);
3087 atomic_set(&rdev->corrected_errors, 0);
3088
3089 INIT_LIST_HEAD(&rdev->same_set);
3090 init_waitqueue_head(&rdev->blocked_wait);
3091
3092
3093
3094
3095
3096 rdev->badblocks.count = 0;
3097 rdev->badblocks.shift = 0;
3098 rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
3099 seqlock_init(&rdev->badblocks.lock);
3100 if (rdev->badblocks.page == NULL)
3101 return -ENOMEM;
3102
3103 return 0;
3104}
3105EXPORT_SYMBOL_GPL(md_rdev_init);
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3117{
3118 char b[BDEVNAME_SIZE];
3119 int err;
3120 struct md_rdev *rdev;
3121 sector_t size;
3122
3123 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3124 if (!rdev) {
3125 printk(KERN_ERR "md: could not alloc mem for new device!\n");
3126 return ERR_PTR(-ENOMEM);
3127 }
3128
3129 err = md_rdev_init(rdev);
3130 if (err)
3131 goto abort_free;
3132 err = alloc_disk_sb(rdev);
3133 if (err)
3134 goto abort_free;
3135
3136 err = lock_rdev(rdev, newdev, super_format == -2);
3137 if (err)
3138 goto abort_free;
3139
3140 kobject_init(&rdev->kobj, &rdev_ktype);
3141
3142 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3143 if (!size) {
3144 printk(KERN_WARNING
3145 "md: %s has zero or unknown size, marking faulty!\n",
3146 bdevname(rdev->bdev,b));
3147 err = -EINVAL;
3148 goto abort_free;
3149 }
3150
3151 if (super_format >= 0) {
3152 err = super_types[super_format].
3153 load_super(rdev, NULL, super_minor);
3154 if (err == -EINVAL) {
3155 printk(KERN_WARNING
3156 "md: %s does not have a valid v%d.%d "
3157 "superblock, not importing!\n",
3158 bdevname(rdev->bdev,b),
3159 super_format, super_minor);
3160 goto abort_free;
3161 }
3162 if (err < 0) {
3163 printk(KERN_WARNING
3164 "md: could not read %s's sb, not importing!\n",
3165 bdevname(rdev->bdev,b));
3166 goto abort_free;
3167 }
3168 }
3169 if (super_format == -1)
3170
3171 rdev->badblocks.shift = -1;
3172
3173 return rdev;
3174
3175abort_free:
3176 if (rdev->bdev)
3177 unlock_rdev(rdev);
3178 free_disk_sb(rdev);
3179 kfree(rdev->badblocks.page);
3180 kfree(rdev);
3181 return ERR_PTR(err);
3182}
3183
3184
3185
3186
3187
3188
3189static void analyze_sbs(struct mddev * mddev)
3190{
3191 int i;
3192 struct md_rdev *rdev, *freshest, *tmp;
3193 char b[BDEVNAME_SIZE];
3194
3195 freshest = NULL;
3196 rdev_for_each(rdev, tmp, mddev)
3197 switch (super_types[mddev->major_version].
3198 load_super(rdev, freshest, mddev->minor_version)) {
3199 case 1:
3200 freshest = rdev;
3201 break;
3202 case 0:
3203 break;
3204 default:
3205 printk( KERN_ERR \
3206 "md: fatal superblock inconsistency in %s"
3207 " -- removing from array\n",
3208 bdevname(rdev->bdev,b));
3209 kick_rdev_from_array(rdev);
3210 }
3211
3212
3213 super_types[mddev->major_version].
3214 validate_super(mddev, freshest);
3215
3216 i = 0;
3217 rdev_for_each(rdev, tmp, mddev) {
3218 if (mddev->max_disks &&
3219 (rdev->desc_nr >= mddev->max_disks ||
3220 i > mddev->max_disks)) {
3221 printk(KERN_WARNING
3222 "md: %s: %s: only %d devices permitted\n",
3223 mdname(mddev), bdevname(rdev->bdev, b),
3224 mddev->max_disks);
3225 kick_rdev_from_array(rdev);
3226 continue;
3227 }
3228 if (rdev != freshest)
3229 if (super_types[mddev->major_version].
3230 validate_super(mddev, rdev)) {
3231 printk(KERN_WARNING "md: kicking non-fresh %s"
3232 " from array!\n",
3233 bdevname(rdev->bdev,b));
3234 kick_rdev_from_array(rdev);
3235 continue;
3236 }
3237 if (mddev->level == LEVEL_MULTIPATH) {
3238 rdev->desc_nr = i++;
3239 rdev->raid_disk = rdev->desc_nr;
3240 set_bit(In_sync, &rdev->flags);
3241 } else if (rdev->raid_disk >= (mddev->raid_disks - min(0, mddev->delta_disks))) {
3242 rdev->raid_disk = -1;
3243 clear_bit(In_sync, &rdev->flags);
3244 }
3245 }
3246}
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3259{
3260 unsigned long result = 0;
3261 long decimals = -1;
3262 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3263 if (*cp == '.')
3264 decimals = 0;
3265 else if (decimals < scale) {
3266 unsigned int value;
3267 value = *cp - '0';
3268 result = result * 10 + value;
3269 if (decimals >= 0)
3270 decimals++;
3271 }
3272 cp++;
3273 }
3274 if (*cp == '\n')
3275 cp++;
3276 if (*cp)
3277 return -EINVAL;
3278 if (decimals < 0)
3279 decimals = 0;
3280 while (decimals < scale) {
3281 result *= 10;
3282 decimals ++;
3283 }
3284 *res = result;
3285 return 0;
3286}
3287
3288
3289static void md_safemode_timeout(unsigned long data);
3290
3291static ssize_t
3292safe_delay_show(struct mddev *mddev, char *page)
3293{
3294 int msec = (mddev->safemode_delay*1000)/HZ;
3295 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3296}
3297static ssize_t
3298safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3299{
3300 unsigned long msec;
3301
3302 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3303 return -EINVAL;
3304 if (msec == 0)
3305 mddev->safemode_delay = 0;
3306 else {
3307 unsigned long old_delay = mddev->safemode_delay;
3308 mddev->safemode_delay = (msec*HZ)/1000;
3309 if (mddev->safemode_delay == 0)
3310 mddev->safemode_delay = 1;
3311 if (mddev->safemode_delay < old_delay)
3312 md_safemode_timeout((unsigned long)mddev);
3313 }
3314 return len;
3315}
3316static struct md_sysfs_entry md_safe_delay =
3317__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3318
3319static ssize_t
3320level_show(struct mddev *mddev, char *page)
3321{
3322 struct md_personality *p = mddev->pers;
3323 if (p)
3324 return sprintf(page, "%s\n", p->name);
3325 else if (mddev->clevel[0])
3326 return sprintf(page, "%s\n", mddev->clevel);
3327 else if (mddev->level != LEVEL_NONE)
3328 return sprintf(page, "%d\n", mddev->level);
3329 else
3330 return 0;
3331}
3332
3333static ssize_t
3334level_store(struct mddev *mddev, const char *buf, size_t len)
3335{
3336 char clevel[16];
3337 ssize_t rv = len;
3338 struct md_personality *pers;
3339 long level;
3340 void *priv;
3341 struct md_rdev *rdev;
3342
3343 if (mddev->pers == NULL) {
3344 if (len == 0)
3345 return 0;
3346 if (len >= sizeof(mddev->clevel))
3347 return -ENOSPC;
3348 strncpy(mddev->clevel, buf, len);
3349 if (mddev->clevel[len-1] == '\n')
3350 len--;
3351 mddev->clevel[len] = 0;
3352 mddev->level = LEVEL_NONE;
3353 return rv;
3354 }
3355
3356
3357
3358
3359
3360
3361
3362 if (mddev->sync_thread ||
3363 mddev->reshape_position != MaxSector ||
3364 mddev->sysfs_active)
3365 return -EBUSY;
3366
3367 if (!mddev->pers->quiesce) {
3368 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
3369 mdname(mddev), mddev->pers->name);
3370 return -EINVAL;
3371 }
3372
3373
3374 if (len == 0 || len >= sizeof(clevel))
3375 return -EINVAL;
3376 strncpy(clevel, buf, len);
3377 if (clevel[len-1] == '\n')
3378 len--;
3379 clevel[len] = 0;
3380 if (strict_strtol(clevel, 10, &level))
3381 level = LEVEL_NONE;
3382
3383 if (request_module("md-%s", clevel) != 0)
3384 request_module("md-level-%s", clevel);
3385 spin_lock(&pers_lock);
3386 pers = find_pers(level, clevel);
3387 if (!pers || !try_module_get(pers->owner)) {
3388 spin_unlock(&pers_lock);
3389 printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
3390 return -EINVAL;
3391 }
3392 spin_unlock(&pers_lock);
3393
3394 if (pers == mddev->pers) {
3395
3396 module_put(pers->owner);
3397 return rv;
3398 }
3399 if (!pers->takeover) {
3400 module_put(pers->owner);
3401 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
3402 mdname(mddev), clevel);
3403 return -EINVAL;
3404 }
3405
3406 list_for_each_entry(rdev, &mddev->disks, same_set)
3407 rdev->new_raid_disk = rdev->raid_disk;
3408
3409
3410
3411
3412 priv = pers->takeover(mddev);
3413 if (IS_ERR(priv)) {
3414 mddev->new_level = mddev->level;
3415 mddev->new_layout = mddev->layout;
3416 mddev->new_chunk_sectors = mddev->chunk_sectors;
3417 mddev->raid_disks -= mddev->delta_disks;
3418 mddev->delta_disks = 0;
3419 module_put(pers->owner);
3420 printk(KERN_WARNING "md: %s: %s would not accept array\n",
3421 mdname(mddev), clevel);
3422 return PTR_ERR(priv);
3423 }
3424
3425
3426 mddev_suspend(mddev);
3427 mddev->pers->stop(mddev);
3428
3429 if (mddev->pers->sync_request == NULL &&
3430 pers->sync_request != NULL) {
3431
3432 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3433 printk(KERN_WARNING
3434 "md: cannot register extra attributes for %s\n",
3435 mdname(mddev));
3436 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action");
3437 }
3438 if (mddev->pers->sync_request != NULL &&
3439 pers->sync_request == NULL) {
3440
3441 if (mddev->to_remove == NULL)
3442 mddev->to_remove = &md_redundancy_group;
3443 }
3444
3445 if (mddev->pers->sync_request == NULL &&
3446 mddev->external) {
3447
3448
3449
3450
3451
3452
3453
3454 mddev->in_sync = 0;
3455 mddev->safemode_delay = 0;
3456 mddev->safemode = 0;
3457 }
3458
3459 list_for_each_entry(rdev, &mddev->disks, same_set) {
3460 if (rdev->raid_disk < 0)
3461 continue;
3462 if (rdev->new_raid_disk >= mddev->raid_disks)
3463 rdev->new_raid_disk = -1;
3464 if (rdev->new_raid_disk == rdev->raid_disk)
3465 continue;
3466 sysfs_unlink_rdev(mddev, rdev);
3467 }
3468 list_for_each_entry(rdev, &mddev->disks, same_set) {
3469 if (rdev->raid_disk < 0)
3470 continue;
3471 if (rdev->new_raid_disk == rdev->raid_disk)
3472 continue;
3473 rdev->raid_disk = rdev->new_raid_disk;
3474 if (rdev->raid_disk < 0)
3475 clear_bit(In_sync, &rdev->flags);
3476 else {
3477 if (sysfs_link_rdev(mddev, rdev))
3478 printk(KERN_WARNING "md: cannot register rd%d"
3479 " for %s after level change\n",
3480 rdev->raid_disk, mdname(mddev));
3481 }
3482 }
3483
3484 module_put(mddev->pers->owner);
3485 mddev->pers = pers;
3486 mddev->private = priv;
3487 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3488 mddev->level = mddev->new_level;
3489 mddev->layout = mddev->new_layout;
3490 mddev->chunk_sectors = mddev->new_chunk_sectors;
3491 mddev->delta_disks = 0;
3492 mddev->degraded = 0;
3493 if (mddev->pers->sync_request == NULL) {
3494
3495
3496
3497 mddev->in_sync = 1;
3498 del_timer_sync(&mddev->safemode_timer);
3499 }
3500 pers->run(mddev);
3501 mddev_resume(mddev);
3502 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3503 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3504 md_wakeup_thread(mddev->thread);
3505 sysfs_notify(&mddev->kobj, NULL, "level");
3506 md_new_event(mddev);
3507 return rv;
3508}
3509
3510static struct md_sysfs_entry md_level =
3511__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3512
3513
3514static ssize_t
3515layout_show(struct mddev *mddev, char *page)
3516{
3517
3518 if (mddev->reshape_position != MaxSector &&
3519 mddev->layout != mddev->new_layout)
3520 return sprintf(page, "%d (%d)\n",
3521 mddev->new_layout, mddev->layout);
3522 return sprintf(page, "%d\n", mddev->layout);
3523}
3524
3525static ssize_t
3526layout_store(struct mddev *mddev, const char *buf, size_t len)
3527{
3528 char *e;
3529 unsigned long n = simple_strtoul(buf, &e, 10);
3530
3531 if (!*buf || (*e && *e != '\n'))
3532 return -EINVAL;
3533
3534 if (mddev->pers) {
3535 int err;
3536 if (mddev->pers->check_reshape == NULL)
3537 return -EBUSY;
3538 mddev->new_layout = n;
3539 err = mddev->pers->check_reshape(mddev);
3540 if (err) {
3541 mddev->new_layout = mddev->layout;
3542 return err;
3543 }
3544 } else {
3545 mddev->new_layout = n;
3546 if (mddev->reshape_position == MaxSector)
3547 mddev->layout = n;
3548 }
3549 return len;
3550}
3551static struct md_sysfs_entry md_layout =
3552__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3553
3554
3555static ssize_t
3556raid_disks_show(struct mddev *mddev, char *page)
3557{
3558 if (mddev->raid_disks == 0)
3559 return 0;
3560 if (mddev->reshape_position != MaxSector &&
3561 mddev->delta_disks != 0)
3562 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3563 mddev->raid_disks - mddev->delta_disks);
3564 return sprintf(page, "%d\n", mddev->raid_disks);
3565}
3566
3567static int update_raid_disks(struct mddev *mddev, int raid_disks);
3568
3569static ssize_t
3570raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3571{
3572 char *e;
3573 int rv = 0;
3574 unsigned long n = simple_strtoul(buf, &e, 10);
3575
3576 if (!*buf || (*e && *e != '\n'))
3577 return -EINVAL;
3578
3579 if (mddev->pers)
3580 rv = update_raid_disks(mddev, n);
3581 else if (mddev->reshape_position != MaxSector) {
3582 int olddisks = mddev->raid_disks - mddev->delta_disks;
3583 mddev->delta_disks = n - olddisks;
3584 mddev->raid_disks = n;
3585 } else
3586 mddev->raid_disks = n;
3587 return rv ? rv : len;
3588}
3589static struct md_sysfs_entry md_raid_disks =
3590__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3591
3592static ssize_t
3593chunk_size_show(struct mddev *mddev, char *page)
3594{
3595 if (mddev->reshape_position != MaxSector &&
3596 mddev->chunk_sectors != mddev->new_chunk_sectors)
3597 return sprintf(page, "%d (%d)\n",
3598 mddev->new_chunk_sectors << 9,
3599 mddev->chunk_sectors << 9);
3600 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3601}
3602
3603static ssize_t
3604chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3605{
3606 char *e;
3607 unsigned long n = simple_strtoul(buf, &e, 10);
3608
3609 if (!*buf || (*e && *e != '\n'))
3610 return -EINVAL;
3611
3612 if (mddev->pers) {
3613 int err;
3614 if (mddev->pers->check_reshape == NULL)
3615 return -EBUSY;
3616 mddev->new_chunk_sectors = n >> 9;
3617 err = mddev->pers->check_reshape(mddev);
3618 if (err) {
3619 mddev->new_chunk_sectors = mddev->chunk_sectors;
3620 return err;
3621 }
3622 } else {
3623 mddev->new_chunk_sectors = n >> 9;
3624 if (mddev->reshape_position == MaxSector)
3625 mddev->chunk_sectors = n >> 9;
3626 }
3627 return len;
3628}
3629static struct md_sysfs_entry md_chunk_size =
3630__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3631
3632static ssize_t
3633resync_start_show(struct mddev *mddev, char *page)
3634{
3635 if (mddev->recovery_cp == MaxSector)
3636 return sprintf(page, "none\n");
3637 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3638}
3639
3640static ssize_t
3641resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3642{
3643 char *e;
3644 unsigned long long n = simple_strtoull(buf, &e, 10);
3645
3646 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3647 return -EBUSY;
3648 if (cmd_match(buf, "none"))
3649 n = MaxSector;
3650 else if (!*buf || (*e && *e != '\n'))
3651 return -EINVAL;
3652
3653 mddev->recovery_cp = n;
3654 return len;
3655}
3656static struct md_sysfs_entry md_resync_start =
3657__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3696 write_pending, active_idle, bad_word};
3697static char *array_states[] = {
3698 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3699 "write-pending", "active-idle", NULL };
3700
3701static int match_word(const char *word, char **list)
3702{
3703 int n;
3704 for (n=0; list[n]; n++)
3705 if (cmd_match(word, list[n]))
3706 break;
3707 return n;
3708}
3709
3710static ssize_t
3711array_state_show(struct mddev *mddev, char *page)
3712{
3713 enum array_state st = inactive;
3714
3715 if (mddev->pers)
3716 switch(mddev->ro) {
3717 case 1:
3718 st = readonly;
3719 break;
3720 case 2:
3721 st = read_auto;
3722 break;
3723 case 0:
3724 if (mddev->in_sync)
3725 st = clean;
3726 else if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
3727 st = write_pending;
3728 else if (mddev->safemode)
3729 st = active_idle;
3730 else
3731 st = active;
3732 }
3733 else {
3734 if (list_empty(&mddev->disks) &&
3735 mddev->raid_disks == 0 &&
3736 mddev->dev_sectors == 0)
3737 st = clear;
3738 else
3739 st = inactive;
3740 }
3741 return sprintf(page, "%s\n", array_states[st]);
3742}
3743
3744static int do_md_stop(struct mddev * mddev, int ro, int is_open);
3745static int md_set_readonly(struct mddev * mddev, int is_open);
3746static int do_md_run(struct mddev * mddev);
3747static int restart_array(struct mddev *mddev);
3748
3749static ssize_t
3750array_state_store(struct mddev *mddev, const char *buf, size_t len)
3751{
3752 int err = -EINVAL;
3753 enum array_state st = match_word(buf, array_states);
3754 switch(st) {
3755 case bad_word:
3756 break;
3757 case clear:
3758
3759 if (atomic_read(&mddev->openers) > 0)
3760 return -EBUSY;
3761 err = do_md_stop(mddev, 0, 0);
3762 break;
3763 case inactive:
3764
3765 if (mddev->pers) {
3766 if (atomic_read(&mddev->openers) > 0)
3767 return -EBUSY;
3768 err = do_md_stop(mddev, 2, 0);
3769 } else
3770 err = 0;
3771 break;
3772 case suspended:
3773 break;
3774 case readonly:
3775 if (mddev->pers)
3776 err = md_set_readonly(mddev, 0);
3777 else {
3778 mddev->ro = 1;
3779 set_disk_ro(mddev->gendisk, 1);
3780 err = do_md_run(mddev);
3781 }
3782 break;
3783 case read_auto:
3784 if (mddev->pers) {
3785 if (mddev->ro == 0)
3786 err = md_set_readonly(mddev, 0);
3787 else if (mddev->ro == 1)
3788 err = restart_array(mddev);
3789 if (err == 0) {
3790 mddev->ro = 2;
3791 set_disk_ro(mddev->gendisk, 0);
3792 }
3793 } else {
3794 mddev->ro = 2;
3795 err = do_md_run(mddev);
3796 }
3797 break;
3798 case clean:
3799 if (mddev->pers) {
3800 restart_array(mddev);
3801 spin_lock_irq(&mddev->write_lock);
3802 if (atomic_read(&mddev->writes_pending) == 0) {
3803 if (mddev->in_sync == 0) {
3804 mddev->in_sync = 1;
3805 if (mddev->safemode == 1)
3806 mddev->safemode = 0;
3807 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3808 }
3809 err = 0;
3810 } else
3811 err = -EBUSY;
3812 spin_unlock_irq(&mddev->write_lock);
3813 } else
3814 err = -EINVAL;
3815 break;
3816 case active:
3817 if (mddev->pers) {
3818 restart_array(mddev);
3819 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
3820 wake_up(&mddev->sb_wait);
3821 err = 0;
3822 } else {
3823 mddev->ro = 0;
3824 set_disk_ro(mddev->gendisk, 0);
3825 err = do_md_run(mddev);
3826 }
3827 break;
3828 case write_pending:
3829 case active_idle:
3830
3831 break;
3832 }
3833 if (err)
3834 return err;
3835 else {
3836 if (mddev->hold_active == UNTIL_IOCTL)
3837 mddev->hold_active = 0;
3838 sysfs_notify_dirent_safe(mddev->sysfs_state);
3839 return len;
3840 }
3841}
3842static struct md_sysfs_entry md_array_state =
3843__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
3844
3845static ssize_t
3846max_corrected_read_errors_show(struct mddev *mddev, char *page) {
3847 return sprintf(page, "%d\n",
3848 atomic_read(&mddev->max_corr_read_errors));
3849}
3850
3851static ssize_t
3852max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
3853{
3854 char *e;
3855 unsigned long n = simple_strtoul(buf, &e, 10);
3856
3857 if (*buf && (*e == 0 || *e == '\n')) {
3858 atomic_set(&mddev->max_corr_read_errors, n);
3859 return len;
3860 }
3861 return -EINVAL;
3862}
3863
3864static struct md_sysfs_entry max_corr_read_errors =
3865__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
3866 max_corrected_read_errors_store);
3867
3868static ssize_t
3869null_show(struct mddev *mddev, char *page)
3870{
3871 return -EINVAL;
3872}
3873
3874static ssize_t
3875new_dev_store(struct mddev *mddev, const char *buf, size_t len)
3876{
3877
3878
3879
3880
3881
3882
3883
3884 char *e;
3885 int major = simple_strtoul(buf, &e, 10);
3886 int minor;
3887 dev_t dev;
3888 struct md_rdev *rdev;
3889 int err;
3890
3891 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
3892 return -EINVAL;
3893 minor = simple_strtoul(e+1, &e, 10);
3894 if (*e && *e != '\n')
3895 return -EINVAL;
3896 dev = MKDEV(major, minor);
3897 if (major != MAJOR(dev) ||
3898 minor != MINOR(dev))
3899 return -EOVERFLOW;
3900
3901
3902 if (mddev->persistent) {
3903 rdev = md_import_device(dev, mddev->major_version,
3904 mddev->minor_version);
3905 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
3906 struct md_rdev *rdev0
3907 = list_entry(mddev->disks.next,
3908 struct md_rdev, same_set);
3909 err = super_types[mddev->major_version]
3910 .load_super(rdev, rdev0, mddev->minor_version);
3911 if (err < 0)
3912 goto out;
3913 }
3914 } else if (mddev->external)
3915 rdev = md_import_device(dev, -2, -1);
3916 else
3917 rdev = md_import_device(dev, -1, -1);
3918
3919 if (IS_ERR(rdev))
3920 return PTR_ERR(rdev);
3921 err = bind_rdev_to_array(rdev, mddev);
3922 out:
3923 if (err)
3924 export_rdev(rdev);
3925 return err ? err : len;
3926}
3927
3928static struct md_sysfs_entry md_new_device =
3929__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
3930
3931static ssize_t
3932bitmap_store(struct mddev *mddev, const char *buf, size_t len)
3933{
3934 char *end;
3935 unsigned long chunk, end_chunk;
3936
3937 if (!mddev->bitmap)
3938 goto out;
3939
3940 while (*buf) {
3941 chunk = end_chunk = simple_strtoul(buf, &end, 0);
3942 if (buf == end) break;
3943 if (*end == '-') {
3944 buf = end + 1;
3945 end_chunk = simple_strtoul(buf, &end, 0);
3946 if (buf == end) break;
3947 }
3948 if (*end && !isspace(*end)) break;
3949 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
3950 buf = skip_spaces(end);
3951 }
3952 bitmap_unplug(mddev->bitmap);
3953out:
3954 return len;
3955}
3956
3957static struct md_sysfs_entry md_bitmap =
3958__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
3959
3960static ssize_t
3961size_show(struct mddev *mddev, char *page)
3962{
3963 return sprintf(page, "%llu\n",
3964 (unsigned long long)mddev->dev_sectors / 2);
3965}
3966
3967static int update_size(struct mddev *mddev, sector_t num_sectors);
3968
3969static ssize_t
3970size_store(struct mddev *mddev, const char *buf, size_t len)
3971{
3972
3973
3974
3975
3976 sector_t sectors;
3977 int err = strict_blocks_to_sectors(buf, §ors);
3978
3979 if (err < 0)
3980 return err;
3981 if (mddev->pers) {
3982 err = update_size(mddev, sectors);
3983 md_update_sb(mddev, 1);
3984 } else {
3985 if (mddev->dev_sectors == 0 ||
3986 mddev->dev_sectors > sectors)
3987 mddev->dev_sectors = sectors;
3988 else
3989 err = -ENOSPC;
3990 }
3991 return err ? err : len;
3992}
3993
3994static struct md_sysfs_entry md_size =
3995__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
3996
3997
3998
3999
4000
4001
4002
4003
4004static ssize_t
4005metadata_show(struct mddev *mddev, char *page)
4006{
4007 if (mddev->persistent)
4008 return sprintf(page, "%d.%d\n",
4009 mddev->major_version, mddev->minor_version);
4010 else if (mddev->external)
4011 return sprintf(page, "external:%s\n", mddev->metadata_type);
4012 else
4013 return sprintf(page, "none\n");
4014}
4015
4016static ssize_t
4017metadata_store(struct mddev *mddev, const char *buf, size_t len)
4018{
4019 int major, minor;
4020 char *e;
4021
4022
4023
4024
4025 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4026 ;
4027 else if (!list_empty(&mddev->disks))
4028 return -EBUSY;
4029
4030 if (cmd_match(buf, "none")) {
4031 mddev->persistent = 0;
4032 mddev->external = 0;
4033 mddev->major_version = 0;
4034 mddev->minor_version = 90;
4035 return len;
4036 }
4037 if (strncmp(buf, "external:", 9) == 0) {
4038 size_t namelen = len-9;
4039 if (namelen >= sizeof(mddev->metadata_type))
4040 namelen = sizeof(mddev->metadata_type)-1;
4041 strncpy(mddev->metadata_type, buf+9, namelen);
4042 mddev->metadata_type[namelen] = 0;
4043 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4044 mddev->metadata_type[--namelen] = 0;
4045 mddev->persistent = 0;
4046 mddev->external = 1;
4047 mddev->major_version = 0;
4048 mddev->minor_version = 90;
4049 return len;
4050 }
4051 major = simple_strtoul(buf, &e, 10);
4052 if (e==buf || *e != '.')
4053 return -EINVAL;
4054 buf = e+1;
4055 minor = simple_strtoul(buf, &e, 10);
4056 if (e==buf || (*e && *e != '\n') )
4057 return -EINVAL;
4058 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4059 return -ENOENT;
4060 mddev->major_version = major;
4061 mddev->minor_version = minor;
4062 mddev->persistent = 1;
4063 mddev->external = 0;
4064 return len;
4065}
4066
4067static struct md_sysfs_entry md_metadata =
4068__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4069
4070static ssize_t
4071action_show(struct mddev *mddev, char *page)
4072{
4073 char *type = "idle";
4074 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4075 type = "frozen";
4076 else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4077 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
4078 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4079 type = "reshape";
4080 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
4081 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
4082 type = "resync";
4083 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
4084 type = "check";
4085 else
4086 type = "repair";
4087 } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
4088 type = "recover";
4089 }
4090 return sprintf(page, "%s\n", type);
4091}
4092
4093static void reap_sync_thread(struct mddev *mddev);
4094
4095static ssize_t
4096action_store(struct mddev *mddev, const char *page, size_t len)
4097{
4098 if (!mddev->pers || !mddev->pers->sync_request)
4099 return -EINVAL;
4100
4101 if (cmd_match(page, "frozen"))
4102 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4103 else
4104 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4105
4106 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4107 if (mddev->sync_thread) {
4108 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4109 reap_sync_thread(mddev);
4110 }
4111 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4112 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
4113 return -EBUSY;
4114 else if (cmd_match(page, "resync"))
4115 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4116 else if (cmd_match(page, "recover")) {
4117 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4118 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4119 } else if (cmd_match(page, "reshape")) {
4120 int err;
4121 if (mddev->pers->start_reshape == NULL)
4122 return -EINVAL;
4123 err = mddev->pers->start_reshape(mddev);
4124 if (err)
4125 return err;
4126 sysfs_notify(&mddev->kobj, NULL, "degraded");
4127 } else {
4128 if (cmd_match(page, "check"))
4129 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4130 else if (!cmd_match(page, "repair"))
4131 return -EINVAL;
4132 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4133 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4134 }
4135 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4136 md_wakeup_thread(mddev->thread);
4137 sysfs_notify_dirent_safe(mddev->sysfs_action);
4138 return len;
4139}
4140
4141static ssize_t
4142mismatch_cnt_show(struct mddev *mddev, char *page)
4143{
4144 return sprintf(page, "%llu\n",
4145 (unsigned long long) mddev->resync_mismatches);
4146}
4147
4148static struct md_sysfs_entry md_scan_mode =
4149__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4150
4151
4152static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4153
4154static ssize_t
4155sync_min_show(struct mddev *mddev, char *page)
4156{
4157 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4158 mddev->sync_speed_min ? "local": "system");
4159}
4160
4161static ssize_t
4162sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4163{
4164 int min;
4165 char *e;
4166 if (strncmp(buf, "system", 6)==0) {
4167 mddev->sync_speed_min = 0;
4168 return len;
4169 }
4170 min = simple_strtoul(buf, &e, 10);
4171 if (buf == e || (*e && *e != '\n') || min <= 0)
4172 return -EINVAL;
4173 mddev->sync_speed_min = min;
4174 return len;
4175}
4176
4177static struct md_sysfs_entry md_sync_min =
4178__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4179
4180static ssize_t
4181sync_max_show(struct mddev *mddev, char *page)
4182{
4183 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4184 mddev->sync_speed_max ? "local": "system");
4185}
4186
4187static ssize_t
4188sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4189{
4190 int max;
4191 char *e;
4192 if (strncmp(buf, "system", 6)==0) {
4193 mddev->sync_speed_max = 0;
4194 return len;
4195 }
4196 max = simple_strtoul(buf, &e, 10);
4197 if (buf == e || (*e && *e != '\n') || max <= 0)
4198 return -EINVAL;
4199 mddev->sync_speed_max = max;
4200 return len;
4201}
4202
4203static struct md_sysfs_entry md_sync_max =
4204__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4205
4206static ssize_t
4207degraded_show(struct mddev *mddev, char *page)
4208{
4209 return sprintf(page, "%d\n", mddev->degraded);
4210}
4211static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4212
4213static ssize_t
4214sync_force_parallel_show(struct mddev *mddev, char *page)
4215{
4216 return sprintf(page, "%d\n", mddev->parallel_resync);
4217}
4218
4219static ssize_t
4220sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4221{
4222 long n;
4223
4224 if (strict_strtol(buf, 10, &n))
4225 return -EINVAL;
4226
4227 if (n != 0 && n != 1)
4228 return -EINVAL;
4229
4230 mddev->parallel_resync = n;
4231
4232 if (mddev->sync_thread)
4233 wake_up(&resync_wait);
4234
4235 return len;
4236}
4237
4238
4239static struct md_sysfs_entry md_sync_force_parallel =
4240__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4241 sync_force_parallel_show, sync_force_parallel_store);
4242
4243static ssize_t
4244sync_speed_show(struct mddev *mddev, char *page)
4245{
4246 unsigned long resync, dt, db;
4247 if (mddev->curr_resync == 0)
4248 return sprintf(page, "none\n");
4249 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4250 dt = (jiffies - mddev->resync_mark) / HZ;
4251 if (!dt) dt++;
4252 db = resync - mddev->resync_mark_cnt;
4253 return sprintf(page, "%lu\n", db/dt/2);
4254}
4255
4256static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4257
4258static ssize_t
4259sync_completed_show(struct mddev *mddev, char *page)
4260{
4261 unsigned long long max_sectors, resync;
4262
4263 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4264 return sprintf(page, "none\n");
4265
4266 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
4267 max_sectors = mddev->resync_max_sectors;
4268 else
4269 max_sectors = mddev->dev_sectors;
4270
4271 resync = mddev->curr_resync_completed;
4272 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4273}
4274
4275static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
4276
4277static ssize_t
4278min_sync_show(struct mddev *mddev, char *page)
4279{
4280 return sprintf(page, "%llu\n",
4281 (unsigned long long)mddev->resync_min);
4282}
4283static ssize_t
4284min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4285{
4286 unsigned long long min;
4287 if (strict_strtoull(buf, 10, &min))
4288 return -EINVAL;
4289 if (min > mddev->resync_max)
4290 return -EINVAL;
4291 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4292 return -EBUSY;
4293
4294
4295 if (mddev->chunk_sectors) {
4296 sector_t temp = min;
4297 if (sector_div(temp, mddev->chunk_sectors))
4298 return -EINVAL;
4299 }
4300 mddev->resync_min = min;
4301
4302 return len;
4303}
4304
4305static struct md_sysfs_entry md_min_sync =
4306__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4307
4308static ssize_t
4309max_sync_show(struct mddev *mddev, char *page)
4310{
4311 if (mddev->resync_max == MaxSector)
4312 return sprintf(page, "max\n");
4313 else
4314 return sprintf(page, "%llu\n",
4315 (unsigned long long)mddev->resync_max);
4316}
4317static ssize_t
4318max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4319{
4320 if (strncmp(buf, "max", 3) == 0)
4321 mddev->resync_max = MaxSector;
4322 else {
4323 unsigned long long max;
4324 if (strict_strtoull(buf, 10, &max))
4325 return -EINVAL;
4326 if (max < mddev->resync_min)
4327 return -EINVAL;
4328 if (max < mddev->resync_max &&
4329 mddev->ro == 0 &&
4330 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4331 return -EBUSY;
4332
4333
4334 if (mddev->chunk_sectors) {
4335 sector_t temp = max;
4336 if (sector_div(temp, mddev->chunk_sectors))
4337 return -EINVAL;
4338 }
4339 mddev->resync_max = max;
4340 }
4341 wake_up(&mddev->recovery_wait);
4342 return len;
4343}
4344
4345static struct md_sysfs_entry md_max_sync =
4346__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4347
4348static ssize_t
4349suspend_lo_show(struct mddev *mddev, char *page)
4350{
4351 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4352}
4353
4354static ssize_t
4355suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4356{
4357 char *e;
4358 unsigned long long new = simple_strtoull(buf, &e, 10);
4359 unsigned long long old = mddev->suspend_lo;
4360
4361 if (mddev->pers == NULL ||
4362 mddev->pers->quiesce == NULL)
4363 return -EINVAL;
4364 if (buf == e || (*e && *e != '\n'))
4365 return -EINVAL;
4366
4367 mddev->suspend_lo = new;
4368 if (new >= old)
4369
4370 mddev->pers->quiesce(mddev, 2);
4371 else {
4372
4373 mddev->pers->quiesce(mddev, 1);
4374 mddev->pers->quiesce(mddev, 0);
4375 }
4376 return len;
4377}
4378static struct md_sysfs_entry md_suspend_lo =
4379__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4380
4381
4382static ssize_t
4383suspend_hi_show(struct mddev *mddev, char *page)
4384{
4385 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4386}
4387
4388static ssize_t
4389suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4390{
4391 char *e;
4392 unsigned long long new = simple_strtoull(buf, &e, 10);
4393 unsigned long long old = mddev->suspend_hi;
4394
4395 if (mddev->pers == NULL ||
4396 mddev->pers->quiesce == NULL)
4397 return -EINVAL;
4398 if (buf == e || (*e && *e != '\n'))
4399 return -EINVAL;
4400
4401 mddev->suspend_hi = new;
4402 if (new <= old)
4403
4404 mddev->pers->quiesce(mddev, 2);
4405 else {
4406
4407 mddev->pers->quiesce(mddev, 1);
4408 mddev->pers->quiesce(mddev, 0);
4409 }
4410 return len;
4411}
4412static struct md_sysfs_entry md_suspend_hi =
4413__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4414
4415static ssize_t
4416reshape_position_show(struct mddev *mddev, char *page)
4417{
4418 if (mddev->reshape_position != MaxSector)
4419 return sprintf(page, "%llu\n",
4420 (unsigned long long)mddev->reshape_position);
4421 strcpy(page, "none\n");
4422 return 5;
4423}
4424
4425static ssize_t
4426reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4427{
4428 char *e;
4429 unsigned long long new = simple_strtoull(buf, &e, 10);
4430 if (mddev->pers)
4431 return -EBUSY;
4432 if (buf == e || (*e && *e != '\n'))
4433 return -EINVAL;
4434 mddev->reshape_position = new;
4435 mddev->delta_disks = 0;
4436 mddev->new_level = mddev->level;
4437 mddev->new_layout = mddev->layout;
4438 mddev->new_chunk_sectors = mddev->chunk_sectors;
4439 return len;
4440}
4441
4442static struct md_sysfs_entry md_reshape_position =
4443__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4444 reshape_position_store);
4445
4446static ssize_t
4447array_size_show(struct mddev *mddev, char *page)
4448{
4449 if (mddev->external_size)
4450 return sprintf(page, "%llu\n",
4451 (unsigned long long)mddev->array_sectors/2);
4452 else
4453 return sprintf(page, "default\n");
4454}
4455
4456static ssize_t
4457array_size_store(struct mddev *mddev, const char *buf, size_t len)
4458{
4459 sector_t sectors;
4460
4461 if (strncmp(buf, "default", 7) == 0) {
4462 if (mddev->pers)
4463 sectors = mddev->pers->size(mddev, 0, 0);
4464 else
4465 sectors = mddev->array_sectors;
4466
4467 mddev->external_size = 0;
4468 } else {
4469 if (strict_blocks_to_sectors(buf, §ors) < 0)
4470 return -EINVAL;
4471 if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4472 return -E2BIG;
4473
4474 mddev->external_size = 1;
4475 }
4476
4477 mddev->array_sectors = sectors;
4478 if (mddev->pers) {
4479 set_capacity(mddev->gendisk, mddev->array_sectors);
4480 revalidate_disk(mddev->gendisk);
4481 }
4482 return len;
4483}
4484
4485static struct md_sysfs_entry md_array_size =
4486__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4487 array_size_store);
4488
4489static struct attribute *md_default_attrs[] = {
4490 &md_level.attr,
4491 &md_layout.attr,
4492 &md_raid_disks.attr,
4493 &md_chunk_size.attr,
4494 &md_size.attr,
4495 &md_resync_start.attr,
4496 &md_metadata.attr,
4497 &md_new_device.attr,
4498 &md_safe_delay.attr,
4499 &md_array_state.attr,
4500 &md_reshape_position.attr,
4501 &md_array_size.attr,
4502 &max_corr_read_errors.attr,
4503 NULL,
4504};
4505
4506static struct attribute *md_redundancy_attrs[] = {
4507 &md_scan_mode.attr,
4508 &md_mismatches.attr,
4509 &md_sync_min.attr,
4510 &md_sync_max.attr,
4511 &md_sync_speed.attr,
4512 &md_sync_force_parallel.attr,
4513 &md_sync_completed.attr,
4514 &md_min_sync.attr,
4515 &md_max_sync.attr,
4516 &md_suspend_lo.attr,
4517 &md_suspend_hi.attr,
4518 &md_bitmap.attr,
4519 &md_degraded.attr,
4520 NULL,
4521};
4522static struct attribute_group md_redundancy_group = {
4523 .name = NULL,
4524 .attrs = md_redundancy_attrs,
4525};
4526
4527
4528static ssize_t
4529md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4530{
4531 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4532 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4533 ssize_t rv;
4534
4535 if (!entry->show)
4536 return -EIO;
4537 spin_lock(&all_mddevs_lock);
4538 if (list_empty(&mddev->all_mddevs)) {
4539 spin_unlock(&all_mddevs_lock);
4540 return -EBUSY;
4541 }
4542 mddev_get(mddev);
4543 spin_unlock(&all_mddevs_lock);
4544
4545 rv = mddev_lock(mddev);
4546 if (!rv) {
4547 rv = entry->show(mddev, page);
4548 mddev_unlock(mddev);
4549 }
4550 mddev_put(mddev);
4551 return rv;
4552}
4553
4554static ssize_t
4555md_attr_store(struct kobject *kobj, struct attribute *attr,
4556 const char *page, size_t length)
4557{
4558 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4559 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4560 ssize_t rv;
4561
4562 if (!entry->store)
4563 return -EIO;
4564 if (!capable(CAP_SYS_ADMIN))
4565 return -EACCES;
4566 spin_lock(&all_mddevs_lock);
4567 if (list_empty(&mddev->all_mddevs)) {
4568 spin_unlock(&all_mddevs_lock);
4569 return -EBUSY;
4570 }
4571 mddev_get(mddev);
4572 spin_unlock(&all_mddevs_lock);
4573 rv = mddev_lock(mddev);
4574 if (!rv) {
4575 rv = entry->store(mddev, page, length);
4576 mddev_unlock(mddev);
4577 }
4578 mddev_put(mddev);
4579 return rv;
4580}
4581
4582static void md_free(struct kobject *ko)
4583{
4584 struct mddev *mddev = container_of(ko, struct mddev, kobj);
4585
4586 if (mddev->sysfs_state)
4587 sysfs_put(mddev->sysfs_state);
4588
4589 if (mddev->gendisk) {
4590 del_gendisk(mddev->gendisk);
4591 put_disk(mddev->gendisk);
4592 }
4593 if (mddev->queue)
4594 blk_cleanup_queue(mddev->queue);
4595
4596 kfree(mddev);
4597}
4598
4599static const struct sysfs_ops md_sysfs_ops = {
4600 .show = md_attr_show,
4601 .store = md_attr_store,
4602};
4603static struct kobj_type md_ktype = {
4604 .release = md_free,
4605 .sysfs_ops = &md_sysfs_ops,
4606 .default_attrs = md_default_attrs,
4607};
4608
4609int mdp_major = 0;
4610
4611static void mddev_delayed_delete(struct work_struct *ws)
4612{
4613 struct mddev *mddev = container_of(ws, struct mddev, del_work);
4614
4615 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
4616 kobject_del(&mddev->kobj);
4617 kobject_put(&mddev->kobj);
4618}
4619
4620static int md_alloc(dev_t dev, char *name)
4621{
4622 static DEFINE_MUTEX(disks_mutex);
4623 struct mddev *mddev = mddev_find(dev);
4624 struct gendisk *disk;
4625 int partitioned;
4626 int shift;
4627 int unit;
4628 int error;
4629
4630 if (!mddev)
4631 return -ENODEV;
4632
4633 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
4634 shift = partitioned ? MdpMinorShift : 0;
4635 unit = MINOR(mddev->unit) >> shift;
4636
4637
4638
4639
4640 flush_workqueue(md_misc_wq);
4641
4642 mutex_lock(&disks_mutex);
4643 error = -EEXIST;
4644 if (mddev->gendisk)
4645 goto abort;
4646
4647 if (name) {
4648
4649
4650 struct mddev *mddev2;
4651 spin_lock(&all_mddevs_lock);
4652
4653 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
4654 if (mddev2->gendisk &&
4655 strcmp(mddev2->gendisk->disk_name, name) == 0) {
4656 spin_unlock(&all_mddevs_lock);
4657 goto abort;
4658 }
4659 spin_unlock(&all_mddevs_lock);
4660 }
4661
4662 error = -ENOMEM;
4663 mddev->queue = blk_alloc_queue(GFP_KERNEL);
4664 if (!mddev->queue)
4665 goto abort;
4666 mddev->queue->queuedata = mddev;
4667
4668 blk_queue_make_request(mddev->queue, md_make_request);
4669 blk_set_stacking_limits(&mddev->queue->limits);
4670
4671 disk = alloc_disk(1 << shift);
4672 if (!disk) {
4673 blk_cleanup_queue(mddev->queue);
4674 mddev->queue = NULL;
4675 goto abort;
4676 }
4677 disk->major = MAJOR(mddev->unit);
4678 disk->first_minor = unit << shift;
4679 if (name)
4680 strcpy(disk->disk_name, name);
4681 else if (partitioned)
4682 sprintf(disk->disk_name, "md_d%d", unit);
4683 else
4684 sprintf(disk->disk_name, "md%d", unit);
4685 disk->fops = &md_fops;
4686 disk->private_data = mddev;
4687 disk->queue = mddev->queue;
4688 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
4689
4690
4691
4692
4693 disk->flags |= GENHD_FL_EXT_DEVT;
4694 mddev->gendisk = disk;
4695
4696
4697
4698 mutex_lock(&mddev->open_mutex);
4699 add_disk(disk);
4700
4701 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
4702 &disk_to_dev(disk)->kobj, "%s", "md");
4703 if (error) {
4704
4705
4706
4707 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
4708 disk->disk_name);
4709 error = 0;
4710 }
4711 if (mddev->kobj.sd &&
4712 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
4713 printk(KERN_DEBUG "pointless warning\n");
4714 mutex_unlock(&mddev->open_mutex);
4715 abort:
4716 mutex_unlock(&disks_mutex);
4717 if (!error && mddev->kobj.sd) {
4718 kobject_uevent(&mddev->kobj, KOBJ_ADD);
4719 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
4720 }
4721 mddev_put(mddev);
4722 return error;
4723}
4724
4725static struct kobject *md_probe(dev_t dev, int *part, void *data)
4726{
4727 md_alloc(dev, NULL);
4728 return NULL;
4729}
4730
4731static int add_named_array(const char *val, struct kernel_param *kp)
4732{
4733
4734
4735
4736
4737 int len = strlen(val);
4738 char buf[DISK_NAME_LEN];
4739
4740 while (len && val[len-1] == '\n')
4741 len--;
4742 if (len >= DISK_NAME_LEN)
4743 return -E2BIG;
4744 strlcpy(buf, val, len+1);
4745 if (strncmp(buf, "md_", 3) != 0)
4746 return -EINVAL;
4747 return md_alloc(0, buf);
4748}
4749
4750static void md_safemode_timeout(unsigned long data)
4751{
4752 struct mddev *mddev = (struct mddev *) data;
4753
4754 if (!atomic_read(&mddev->writes_pending)) {
4755 mddev->safemode = 1;
4756 if (mddev->external)
4757 sysfs_notify_dirent_safe(mddev->sysfs_state);
4758 }
4759 md_wakeup_thread(mddev->thread);
4760}
4761
4762static int start_dirty_degraded;
4763
4764int md_run(struct mddev *mddev)
4765{
4766 int err;
4767 struct md_rdev *rdev;
4768 struct md_personality *pers;
4769
4770 if (list_empty(&mddev->disks))
4771
4772 return -EINVAL;
4773
4774 if (mddev->pers)
4775 return -EBUSY;
4776
4777 if (mddev->sysfs_active)
4778 return -EBUSY;
4779
4780
4781
4782
4783 if (!mddev->raid_disks) {
4784 if (!mddev->persistent)
4785 return -EINVAL;
4786 analyze_sbs(mddev);
4787 }
4788
4789 if (mddev->level != LEVEL_NONE)
4790 request_module("md-level-%d", mddev->level);
4791 else if (mddev->clevel[0])
4792 request_module("md-%s", mddev->clevel);
4793
4794
4795
4796
4797
4798
4799 list_for_each_entry(rdev, &mddev->disks, same_set) {
4800 if (test_bit(Faulty, &rdev->flags))
4801 continue;
4802 sync_blockdev(rdev->bdev);
4803 invalidate_bdev(rdev->bdev);
4804
4805
4806
4807
4808
4809 if (rdev->meta_bdev) {
4810 ;
4811 } else if (rdev->data_offset < rdev->sb_start) {
4812 if (mddev->dev_sectors &&
4813 rdev->data_offset + mddev->dev_sectors
4814 > rdev->sb_start) {
4815 printk("md: %s: data overlaps metadata\n",
4816 mdname(mddev));
4817 return -EINVAL;
4818 }
4819 } else {
4820 if (rdev->sb_start + rdev->sb_size/512
4821 > rdev->data_offset) {
4822 printk("md: %s: metadata overlaps data\n",
4823 mdname(mddev));
4824 return -EINVAL;
4825 }
4826 }
4827 sysfs_notify_dirent_safe(rdev->sysfs_state);
4828 }
4829
4830 if (mddev->bio_set == NULL)
4831 mddev->bio_set = bioset_create(BIO_POOL_SIZE,
4832 sizeof(struct mddev *));
4833
4834 spin_lock(&pers_lock);
4835 pers = find_pers(mddev->level, mddev->clevel);
4836 if (!pers || !try_module_get(pers->owner)) {
4837 spin_unlock(&pers_lock);
4838 if (mddev->level != LEVEL_NONE)
4839 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
4840 mddev->level);
4841 else
4842 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
4843 mddev->clevel);
4844 return -EINVAL;
4845 }
4846 mddev->pers = pers;
4847 spin_unlock(&pers_lock);
4848 if (mddev->level != pers->level) {
4849 mddev->level = pers->level;
4850 mddev->new_level = pers->level;
4851 }
4852 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4853
4854 if (mddev->reshape_position != MaxSector &&
4855 pers->start_reshape == NULL) {
4856
4857 mddev->pers = NULL;
4858 module_put(pers->owner);
4859 return -EINVAL;
4860 }
4861
4862 if (pers->sync_request) {
4863
4864
4865
4866 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
4867 struct md_rdev *rdev2;
4868 int warned = 0;
4869
4870 list_for_each_entry(rdev, &mddev->disks, same_set)
4871 list_for_each_entry(rdev2, &mddev->disks, same_set) {
4872 if (rdev < rdev2 &&
4873 rdev->bdev->bd_contains ==
4874 rdev2->bdev->bd_contains) {
4875 printk(KERN_WARNING
4876 "%s: WARNING: %s appears to be"
4877 " on the same physical disk as"
4878 " %s.\n",
4879 mdname(mddev),
4880 bdevname(rdev->bdev,b),
4881 bdevname(rdev2->bdev,b2));
4882 warned = 1;
4883 }
4884 }
4885
4886 if (warned)
4887 printk(KERN_WARNING
4888 "True protection against single-disk"
4889 " failure might be compromised.\n");
4890 }
4891
4892 mddev->recovery = 0;
4893
4894 mddev->resync_max_sectors = mddev->dev_sectors;
4895
4896 mddev->ok_start_degraded = start_dirty_degraded;
4897
4898 if (start_readonly && mddev->ro == 0)
4899 mddev->ro = 2;
4900
4901 err = mddev->pers->run(mddev);
4902 if (err)
4903 printk(KERN_ERR "md: pers->run() failed ...\n");
4904 else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
4905 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
4906 " but 'external_size' not in effect?\n", __func__);
4907 printk(KERN_ERR
4908 "md: invalid array_size %llu > default size %llu\n",
4909 (unsigned long long)mddev->array_sectors / 2,
4910 (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
4911 err = -EINVAL;
4912 mddev->pers->stop(mddev);
4913 }
4914 if (err == 0 && mddev->pers->sync_request) {
4915 err = bitmap_create(mddev);
4916 if (err) {
4917 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
4918 mdname(mddev), err);
4919 mddev->pers->stop(mddev);
4920 }
4921 }
4922 if (err) {
4923 module_put(mddev->pers->owner);
4924 mddev->pers = NULL;
4925 bitmap_destroy(mddev);
4926 return err;
4927 }
4928 if (mddev->pers->sync_request) {
4929 if (mddev->kobj.sd &&
4930 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4931 printk(KERN_WARNING
4932 "md: cannot register extra attributes for %s\n",
4933 mdname(mddev));
4934 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
4935 } else if (mddev->ro == 2)
4936 mddev->ro = 0;
4937
4938 atomic_set(&mddev->writes_pending,0);
4939 atomic_set(&mddev->max_corr_read_errors,
4940 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
4941 mddev->safemode = 0;
4942 mddev->safemode_timer.function = md_safemode_timeout;
4943 mddev->safemode_timer.data = (unsigned long) mddev;
4944 mddev->safemode_delay = (200 * HZ)/1000 +1;
4945 mddev->in_sync = 1;
4946 smp_wmb();
4947 mddev->ready = 1;
4948 list_for_each_entry(rdev, &mddev->disks, same_set)
4949 if (rdev->raid_disk >= 0)
4950 if (sysfs_link_rdev(mddev, rdev))
4951 ;
4952
4953 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4954
4955 if (mddev->flags)
4956 md_update_sb(mddev, 0);
4957
4958 md_new_event(mddev);
4959 sysfs_notify_dirent_safe(mddev->sysfs_state);
4960 sysfs_notify_dirent_safe(mddev->sysfs_action);
4961 sysfs_notify(&mddev->kobj, NULL, "degraded");
4962 return 0;
4963}
4964EXPORT_SYMBOL_GPL(md_run);
4965
4966static int do_md_run(struct mddev *mddev)
4967{
4968 int err;
4969
4970 err = md_run(mddev);
4971 if (err)
4972 goto out;
4973 err = bitmap_load(mddev);
4974 if (err) {
4975 bitmap_destroy(mddev);
4976 goto out;
4977 }
4978
4979 md_wakeup_thread(mddev->thread);
4980 md_wakeup_thread(mddev->sync_thread);
4981
4982 set_capacity(mddev->gendisk, mddev->array_sectors);
4983 revalidate_disk(mddev->gendisk);
4984 mddev->changed = 1;
4985 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
4986out:
4987 return err;
4988}
4989
4990static int restart_array(struct mddev *mddev)
4991{
4992 struct gendisk *disk = mddev->gendisk;
4993
4994
4995 if (list_empty(&mddev->disks))
4996 return -ENXIO;
4997 if (!mddev->pers)
4998 return -EINVAL;
4999 if (!mddev->ro)
5000 return -EBUSY;
5001 mddev->safemode = 0;
5002 mddev->ro = 0;
5003 set_disk_ro(disk, 0);
5004 printk(KERN_INFO "md: %s switched to read-write mode.\n",
5005 mdname(mddev));
5006
5007 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5008 md_wakeup_thread(mddev->thread);
5009 md_wakeup_thread(mddev->sync_thread);
5010 sysfs_notify_dirent_safe(mddev->sysfs_state);
5011 return 0;
5012}
5013
5014
5015
5016static int deny_bitmap_write_access(struct file * file)
5017{
5018 struct inode *inode = file->f_mapping->host;
5019
5020 spin_lock(&inode->i_lock);
5021 if (atomic_read(&inode->i_writecount) > 1) {
5022 spin_unlock(&inode->i_lock);
5023 return -ETXTBSY;
5024 }
5025 atomic_set(&inode->i_writecount, -1);
5026 spin_unlock(&inode->i_lock);
5027
5028 return 0;
5029}
5030
5031void restore_bitmap_write_access(struct file *file)
5032{
5033 struct inode *inode = file->f_mapping->host;
5034
5035 spin_lock(&inode->i_lock);
5036 atomic_set(&inode->i_writecount, 1);
5037 spin_unlock(&inode->i_lock);
5038}
5039
5040static void md_clean(struct mddev *mddev)
5041{
5042 mddev->array_sectors = 0;
5043 mddev->external_size = 0;
5044 mddev->dev_sectors = 0;
5045 mddev->raid_disks = 0;
5046 mddev->recovery_cp = 0;
5047 mddev->resync_min = 0;
5048 mddev->resync_max = MaxSector;
5049 mddev->reshape_position = MaxSector;
5050 mddev->external = 0;
5051 mddev->persistent = 0;
5052 mddev->level = LEVEL_NONE;
5053 mddev->clevel[0] = 0;
5054 mddev->flags = 0;
5055 mddev->ro = 0;
5056 mddev->metadata_type[0] = 0;
5057 mddev->chunk_sectors = 0;
5058 mddev->ctime = mddev->utime = 0;
5059 mddev->layout = 0;
5060 mddev->max_disks = 0;
5061 mddev->events = 0;
5062 mddev->can_decrease_events = 0;
5063 mddev->delta_disks = 0;
5064 mddev->new_level = LEVEL_NONE;
5065 mddev->new_layout = 0;
5066 mddev->new_chunk_sectors = 0;
5067 mddev->curr_resync = 0;
5068 mddev->resync_mismatches = 0;
5069 mddev->suspend_lo = mddev->suspend_hi = 0;
5070 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5071 mddev->recovery = 0;
5072 mddev->in_sync = 0;
5073 mddev->changed = 0;
5074 mddev->degraded = 0;
5075 mddev->safemode = 0;
5076 mddev->bitmap_info.offset = 0;
5077 mddev->bitmap_info.default_offset = 0;
5078 mddev->bitmap_info.chunksize = 0;
5079 mddev->bitmap_info.daemon_sleep = 0;
5080 mddev->bitmap_info.max_write_behind = 0;
5081}
5082
5083static void __md_stop_writes(struct mddev *mddev)
5084{
5085 if (mddev->sync_thread) {
5086 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5087 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5088 reap_sync_thread(mddev);
5089 }
5090
5091 del_timer_sync(&mddev->safemode_timer);
5092
5093 bitmap_flush(mddev);
5094 md_super_wait(mddev);
5095
5096 if (!mddev->in_sync || mddev->flags) {
5097
5098 mddev->in_sync = 1;
5099 md_update_sb(mddev, 1);
5100 }
5101}
5102
5103void md_stop_writes(struct mddev *mddev)
5104{
5105 mddev_lock(mddev);
5106 __md_stop_writes(mddev);
5107 mddev_unlock(mddev);
5108}
5109EXPORT_SYMBOL_GPL(md_stop_writes);
5110
5111void md_stop(struct mddev *mddev)
5112{
5113 mddev->ready = 0;
5114 mddev->pers->stop(mddev);
5115 if (mddev->pers->sync_request && mddev->to_remove == NULL)
5116 mddev->to_remove = &md_redundancy_group;
5117 module_put(mddev->pers->owner);
5118 mddev->pers = NULL;
5119 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5120}
5121EXPORT_SYMBOL_GPL(md_stop);
5122
5123static int md_set_readonly(struct mddev *mddev, int is_open)
5124{
5125 int err = 0;
5126 mutex_lock(&mddev->open_mutex);
5127 if (atomic_read(&mddev->openers) > is_open) {
5128 printk("md: %s still in use.\n",mdname(mddev));
5129 err = -EBUSY;
5130 goto out;
5131 }
5132 if (mddev->pers) {
5133 __md_stop_writes(mddev);
5134
5135 err = -ENXIO;
5136 if (mddev->ro==1)
5137 goto out;
5138 mddev->ro = 1;
5139 set_disk_ro(mddev->gendisk, 1);
5140 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5141 sysfs_notify_dirent_safe(mddev->sysfs_state);
5142 err = 0;
5143 }
5144out:
5145 mutex_unlock(&mddev->open_mutex);
5146 return err;
5147}
5148
5149
5150
5151
5152
5153static int do_md_stop(struct mddev * mddev, int mode, int is_open)
5154{
5155 struct gendisk *disk = mddev->gendisk;
5156 struct md_rdev *rdev;
5157
5158 mutex_lock(&mddev->open_mutex);
5159 if (atomic_read(&mddev->openers) > is_open ||
5160 mddev->sysfs_active) {
5161 printk("md: %s still in use.\n",mdname(mddev));
5162 mutex_unlock(&mddev->open_mutex);
5163 return -EBUSY;
5164 }
5165
5166 if (mddev->pers) {
5167 if (mddev->ro)
5168 set_disk_ro(disk, 0);
5169
5170 __md_stop_writes(mddev);
5171 md_stop(mddev);
5172 mddev->queue->merge_bvec_fn = NULL;
5173 mddev->queue->backing_dev_info.congested_fn = NULL;
5174
5175
5176 sysfs_notify_dirent_safe(mddev->sysfs_state);
5177
5178 list_for_each_entry(rdev, &mddev->disks, same_set)
5179 if (rdev->raid_disk >= 0)
5180 sysfs_unlink_rdev(mddev, rdev);
5181
5182 set_capacity(disk, 0);
5183 mutex_unlock(&mddev->open_mutex);
5184 mddev->changed = 1;
5185 revalidate_disk(disk);
5186
5187 if (mddev->ro)
5188 mddev->ro = 0;
5189 } else
5190 mutex_unlock(&mddev->open_mutex);
5191
5192
5193
5194 if (mode == 0) {
5195 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
5196
5197 bitmap_destroy(mddev);
5198 if (mddev->bitmap_info.file) {
5199 restore_bitmap_write_access(mddev->bitmap_info.file);
5200 fput(mddev->bitmap_info.file);
5201 mddev->bitmap_info.file = NULL;
5202 }
5203 mddev->bitmap_info.offset = 0;
5204
5205 export_array(mddev);
5206
5207 md_clean(mddev);
5208 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5209 if (mddev->hold_active == UNTIL_STOP)
5210 mddev->hold_active = 0;
5211 }
5212 blk_integrity_unregister(disk);
5213 md_new_event(mddev);
5214 sysfs_notify_dirent_safe(mddev->sysfs_state);
5215 return 0;
5216}
5217
5218#ifndef MODULE
5219static void autorun_array(struct mddev *mddev)
5220{
5221 struct md_rdev *rdev;
5222 int err;
5223
5224 if (list_empty(&mddev->disks))
5225 return;
5226
5227 printk(KERN_INFO "md: running: ");
5228
5229 list_for_each_entry(rdev, &mddev->disks, same_set) {
5230 char b[BDEVNAME_SIZE];
5231 printk("<%s>", bdevname(rdev->bdev,b));
5232 }
5233 printk("\n");
5234
5235 err = do_md_run(mddev);
5236 if (err) {
5237 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
5238 do_md_stop(mddev, 0, 0);
5239 }
5240}
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254static void autorun_devices(int part)
5255{
5256 struct md_rdev *rdev0, *rdev, *tmp;
5257 struct mddev *mddev;
5258 char b[BDEVNAME_SIZE];
5259
5260 printk(KERN_INFO "md: autorun ...\n");
5261 while (!list_empty(&pending_raid_disks)) {
5262 int unit;
5263 dev_t dev;
5264 LIST_HEAD(candidates);
5265 rdev0 = list_entry(pending_raid_disks.next,
5266 struct md_rdev, same_set);
5267
5268 printk(KERN_INFO "md: considering %s ...\n",
5269 bdevname(rdev0->bdev,b));
5270 INIT_LIST_HEAD(&candidates);
5271 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
5272 if (super_90_load(rdev, rdev0, 0) >= 0) {
5273 printk(KERN_INFO "md: adding %s ...\n",
5274 bdevname(rdev->bdev,b));
5275 list_move(&rdev->same_set, &candidates);
5276 }
5277
5278
5279
5280
5281
5282 if (part) {
5283 dev = MKDEV(mdp_major,
5284 rdev0->preferred_minor << MdpMinorShift);
5285 unit = MINOR(dev) >> MdpMinorShift;
5286 } else {
5287 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
5288 unit = MINOR(dev);
5289 }
5290 if (rdev0->preferred_minor != unit) {
5291 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
5292 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
5293 break;
5294 }
5295
5296 md_probe(dev, NULL, NULL);
5297 mddev = mddev_find(dev);
5298 if (!mddev || !mddev->gendisk) {
5299 if (mddev)
5300 mddev_put(mddev);
5301 printk(KERN_ERR
5302 "md: cannot allocate memory for md drive.\n");
5303 break;
5304 }
5305 if (mddev_lock(mddev))
5306 printk(KERN_WARNING "md: %s locked, cannot run\n",
5307 mdname(mddev));
5308 else if (mddev->raid_disks || mddev->major_version
5309 || !list_empty(&mddev->disks)) {
5310 printk(KERN_WARNING
5311 "md: %s already running, cannot run %s\n",
5312 mdname(mddev), bdevname(rdev0->bdev,b));
5313 mddev_unlock(mddev);
5314 } else {
5315 printk(KERN_INFO "md: created %s\n", mdname(mddev));
5316 mddev->persistent = 1;
5317 rdev_for_each_list(rdev, tmp, &candidates) {
5318 list_del_init(&rdev->same_set);
5319 if (bind_rdev_to_array(rdev, mddev))
5320 export_rdev(rdev);
5321 }
5322 autorun_array(mddev);
5323 mddev_unlock(mddev);
5324 }
5325
5326
5327
5328 rdev_for_each_list(rdev, tmp, &candidates) {
5329 list_del_init(&rdev->same_set);
5330 export_rdev(rdev);
5331 }
5332 mddev_put(mddev);
5333 }
5334 printk(KERN_INFO "md: ... autorun DONE.\n");
5335}
5336#endif
5337
5338static int get_version(void __user * arg)
5339{
5340 mdu_version_t ver;
5341
5342 ver.major = MD_MAJOR_VERSION;
5343 ver.minor = MD_MINOR_VERSION;
5344 ver.patchlevel = MD_PATCHLEVEL_VERSION;
5345
5346 if (copy_to_user(arg, &ver, sizeof(ver)))
5347 return -EFAULT;
5348
5349 return 0;
5350}
5351
5352static int get_array_info(struct mddev * mddev, void __user * arg)
5353{
5354 mdu_array_info_t info;
5355 int nr,working,insync,failed,spare;
5356 struct md_rdev *rdev;
5357
5358 nr=working=insync=failed=spare=0;
5359 list_for_each_entry(rdev, &mddev->disks, same_set) {
5360 nr++;
5361 if (test_bit(Faulty, &rdev->flags))
5362 failed++;
5363 else {
5364 working++;
5365 if (test_bit(In_sync, &rdev->flags))
5366 insync++;
5367 else
5368 spare++;
5369 }
5370 }
5371
5372 info.major_version = mddev->major_version;
5373 info.minor_version = mddev->minor_version;
5374 info.patch_version = MD_PATCHLEVEL_VERSION;
5375 info.ctime = mddev->ctime;
5376 info.level = mddev->level;
5377 info.size = mddev->dev_sectors / 2;
5378 if (info.size != mddev->dev_sectors / 2)
5379 info.size = -1;
5380 info.nr_disks = nr;
5381 info.raid_disks = mddev->raid_disks;
5382 info.md_minor = mddev->md_minor;
5383 info.not_persistent= !mddev->persistent;
5384
5385 info.utime = mddev->utime;
5386 info.state = 0;
5387 if (mddev->in_sync)
5388 info.state = (1<<MD_SB_CLEAN);
5389 if (mddev->bitmap && mddev->bitmap_info.offset)
5390 info.state = (1<<MD_SB_BITMAP_PRESENT);
5391 info.active_disks = insync;
5392 info.working_disks = working;
5393 info.failed_disks = failed;
5394 info.spare_disks = spare;
5395
5396 info.layout = mddev->layout;
5397 info.chunk_size = mddev->chunk_sectors << 9;
5398
5399 if (copy_to_user(arg, &info, sizeof(info)))
5400 return -EFAULT;
5401
5402 return 0;
5403}
5404
5405static int get_bitmap_file(struct mddev * mddev, void __user * arg)
5406{
5407 mdu_bitmap_file_t *file = NULL;
5408 char *ptr, *buf = NULL;
5409 int err = -ENOMEM;
5410
5411 if (md_allow_write(mddev))
5412 file = kmalloc(sizeof(*file), GFP_NOIO);
5413 else
5414 file = kmalloc(sizeof(*file), GFP_KERNEL);
5415
5416 if (!file)
5417 goto out;
5418
5419
5420 if (!mddev->bitmap || !mddev->bitmap->file) {
5421 file->pathname[0] = '\0';
5422 goto copy_out;
5423 }
5424
5425 buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
5426 if (!buf)
5427 goto out;
5428
5429 ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname));
5430 if (IS_ERR(ptr))
5431 goto out;
5432
5433 strcpy(file->pathname, ptr);
5434
5435copy_out:
5436 err = 0;
5437 if (copy_to_user(arg, file, sizeof(*file)))
5438 err = -EFAULT;
5439out:
5440 kfree(buf);
5441 kfree(file);
5442 return err;
5443}
5444
5445static int get_disk_info(struct mddev * mddev, void __user * arg)
5446{
5447 mdu_disk_info_t info;
5448 struct md_rdev *rdev;
5449
5450 if (copy_from_user(&info, arg, sizeof(info)))
5451 return -EFAULT;
5452
5453 rdev = find_rdev_nr(mddev, info.number);
5454 if (rdev) {
5455 info.major = MAJOR(rdev->bdev->bd_dev);
5456 info.minor = MINOR(rdev->bdev->bd_dev);
5457 info.raid_disk = rdev->raid_disk;
5458 info.state = 0;
5459 if (test_bit(Faulty, &rdev->flags))
5460 info.state |= (1<<MD_DISK_FAULTY);
5461 else if (test_bit(In_sync, &rdev->flags)) {
5462 info.state |= (1<<MD_DISK_ACTIVE);
5463 info.state |= (1<<MD_DISK_SYNC);
5464 }
5465 if (test_bit(WriteMostly, &rdev->flags))
5466 info.state |= (1<<MD_DISK_WRITEMOSTLY);
5467 } else {
5468 info.major = info.minor = 0;
5469 info.raid_disk = -1;
5470 info.state = (1<<MD_DISK_REMOVED);
5471 }
5472
5473 if (copy_to_user(arg, &info, sizeof(info)))
5474 return -EFAULT;
5475
5476 return 0;
5477}
5478
5479static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
5480{
5481 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5482 struct md_rdev *rdev;
5483 dev_t dev = MKDEV(info->major,info->minor);
5484
5485 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
5486 return -EOVERFLOW;
5487
5488 if (!mddev->raid_disks) {
5489 int err;
5490
5491 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
5492 if (IS_ERR(rdev)) {
5493 printk(KERN_WARNING
5494 "md: md_import_device returned %ld\n",
5495 PTR_ERR(rdev));
5496 return PTR_ERR(rdev);
5497 }
5498 if (!list_empty(&mddev->disks)) {
5499 struct md_rdev *rdev0
5500 = list_entry(mddev->disks.next,
5501 struct md_rdev, same_set);
5502 err = super_types[mddev->major_version]
5503 .load_super(rdev, rdev0, mddev->minor_version);
5504 if (err < 0) {
5505 printk(KERN_WARNING
5506 "md: %s has different UUID to %s\n",
5507 bdevname(rdev->bdev,b),
5508 bdevname(rdev0->bdev,b2));
5509 export_rdev(rdev);
5510 return -EINVAL;
5511 }
5512 }
5513 err = bind_rdev_to_array(rdev, mddev);
5514 if (err)
5515 export_rdev(rdev);
5516 return err;
5517 }
5518
5519
5520
5521
5522
5523
5524 if (mddev->pers) {
5525 int err;
5526 if (!mddev->pers->hot_add_disk) {
5527 printk(KERN_WARNING
5528 "%s: personality does not support diskops!\n",
5529 mdname(mddev));
5530 return -EINVAL;
5531 }
5532 if (mddev->persistent)
5533 rdev = md_import_device(dev, mddev->major_version,
5534 mddev->minor_version);
5535 else
5536 rdev = md_import_device(dev, -1, -1);
5537 if (IS_ERR(rdev)) {
5538 printk(KERN_WARNING
5539 "md: md_import_device returned %ld\n",
5540 PTR_ERR(rdev));
5541 return PTR_ERR(rdev);
5542 }
5543
5544 if (!mddev->persistent) {
5545 if (info->state & (1<<MD_DISK_SYNC) &&
5546 info->raid_disk < mddev->raid_disks) {
5547 rdev->raid_disk = info->raid_disk;
5548 set_bit(In_sync, &rdev->flags);
5549 } else
5550 rdev->raid_disk = -1;
5551 } else
5552 super_types[mddev->major_version].
5553 validate_super(mddev, rdev);
5554 if ((info->state & (1<<MD_DISK_SYNC)) &&
5555 (!test_bit(In_sync, &rdev->flags) ||
5556 rdev->raid_disk != info->raid_disk)) {
5557
5558
5559
5560 export_rdev(rdev);
5561 return -EINVAL;
5562 }
5563
5564 if (test_bit(In_sync, &rdev->flags))
5565 rdev->saved_raid_disk = rdev->raid_disk;
5566 else
5567 rdev->saved_raid_disk = -1;
5568
5569 clear_bit(In_sync, &rdev->flags);
5570 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5571 set_bit(WriteMostly, &rdev->flags);
5572 else
5573 clear_bit(WriteMostly, &rdev->flags);
5574
5575 rdev->raid_disk = -1;
5576 err = bind_rdev_to_array(rdev, mddev);
5577 if (!err && !mddev->pers->hot_remove_disk) {
5578
5579
5580
5581
5582 super_types[mddev->major_version].
5583 validate_super(mddev, rdev);
5584 err = mddev->pers->hot_add_disk(mddev, rdev);
5585 if (err)
5586 unbind_rdev_from_array(rdev);
5587 }
5588 if (err)
5589 export_rdev(rdev);
5590 else
5591 sysfs_notify_dirent_safe(rdev->sysfs_state);
5592
5593 md_update_sb(mddev, 1);
5594 if (mddev->degraded)
5595 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5596 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5597 if (!err)
5598 md_new_event(mddev);
5599 md_wakeup_thread(mddev->thread);
5600 return err;
5601 }
5602
5603
5604
5605
5606 if (mddev->major_version != 0) {
5607 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
5608 mdname(mddev));
5609 return -EINVAL;
5610 }
5611
5612 if (!(info->state & (1<<MD_DISK_FAULTY))) {
5613 int err;
5614 rdev = md_import_device(dev, -1, 0);
5615 if (IS_ERR(rdev)) {
5616 printk(KERN_WARNING
5617 "md: error, md_import_device() returned %ld\n",
5618 PTR_ERR(rdev));
5619 return PTR_ERR(rdev);
5620 }
5621 rdev->desc_nr = info->number;
5622 if (info->raid_disk < mddev->raid_disks)
5623 rdev->raid_disk = info->raid_disk;
5624 else
5625 rdev->raid_disk = -1;
5626
5627 if (rdev->raid_disk < mddev->raid_disks)
5628 if (info->state & (1<<MD_DISK_SYNC))
5629 set_bit(In_sync, &rdev->flags);
5630
5631 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5632 set_bit(WriteMostly, &rdev->flags);
5633
5634 if (!mddev->persistent) {
5635 printk(KERN_INFO "md: nonpersistent superblock ...\n");
5636 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5637 } else
5638 rdev->sb_start = calc_dev_sboffset(rdev);
5639 rdev->sectors = rdev->sb_start;
5640
5641 err = bind_rdev_to_array(rdev, mddev);
5642 if (err) {
5643 export_rdev(rdev);
5644 return err;
5645 }
5646 }
5647
5648 return 0;
5649}
5650
5651static int hot_remove_disk(struct mddev * mddev, dev_t dev)
5652{
5653 char b[BDEVNAME_SIZE];
5654 struct md_rdev *rdev;
5655
5656 rdev = find_rdev(mddev, dev);
5657 if (!rdev)
5658 return -ENXIO;
5659
5660 if (rdev->raid_disk >= 0)
5661 goto busy;
5662
5663 kick_rdev_from_array(rdev);
5664 md_update_sb(mddev, 1);
5665 md_new_event(mddev);
5666
5667 return 0;
5668busy:
5669 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
5670 bdevname(rdev->bdev,b), mdname(mddev));
5671 return -EBUSY;
5672}
5673
5674static int hot_add_disk(struct mddev * mddev, dev_t dev)
5675{
5676 char b[BDEVNAME_SIZE];
5677 int err;
5678 struct md_rdev *rdev;
5679
5680 if (!mddev->pers)
5681 return -ENODEV;
5682
5683 if (mddev->major_version != 0) {
5684 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
5685 " version-0 superblocks.\n",
5686 mdname(mddev));
5687 return -EINVAL;
5688 }
5689 if (!mddev->pers->hot_add_disk) {
5690 printk(KERN_WARNING
5691 "%s: personality does not support diskops!\n",
5692 mdname(mddev));
5693 return -EINVAL;
5694 }
5695
5696 rdev = md_import_device(dev, -1, 0);
5697 if (IS_ERR(rdev)) {
5698 printk(KERN_WARNING
5699 "md: error, md_import_device() returned %ld\n",
5700 PTR_ERR(rdev));
5701 return -EINVAL;
5702 }
5703
5704 if (mddev->persistent)
5705 rdev->sb_start = calc_dev_sboffset(rdev);
5706 else
5707 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5708
5709 rdev->sectors = rdev->sb_start;
5710
5711 if (test_bit(Faulty, &rdev->flags)) {
5712 printk(KERN_WARNING
5713 "md: can not hot-add faulty %s disk to %s!\n",
5714 bdevname(rdev->bdev,b), mdname(mddev));
5715 err = -EINVAL;
5716 goto abort_export;
5717 }
5718 clear_bit(In_sync, &rdev->flags);
5719 rdev->desc_nr = -1;
5720 rdev->saved_raid_disk = -1;
5721 err = bind_rdev_to_array(rdev, mddev);
5722 if (err)
5723 goto abort_export;
5724
5725
5726
5727
5728
5729
5730 rdev->raid_disk = -1;
5731
5732 md_update_sb(mddev, 1);
5733
5734
5735
5736
5737
5738 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5739 md_wakeup_thread(mddev->thread);
5740 md_new_event(mddev);
5741 return 0;
5742
5743abort_export:
5744 export_rdev(rdev);
5745 return err;
5746}
5747
5748static int set_bitmap_file(struct mddev *mddev, int fd)
5749{
5750 int err;
5751
5752 if (mddev->pers) {
5753 if (!mddev->pers->quiesce)
5754 return -EBUSY;
5755 if (mddev->recovery || mddev->sync_thread)
5756 return -EBUSY;
5757
5758 }
5759
5760
5761 if (fd >= 0) {
5762 if (mddev->bitmap)
5763 return -EEXIST;
5764 mddev->bitmap_info.file = fget(fd);
5765
5766 if (mddev->bitmap_info.file == NULL) {
5767 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
5768 mdname(mddev));
5769 return -EBADF;
5770 }
5771
5772 err = deny_bitmap_write_access(mddev->bitmap_info.file);
5773 if (err) {
5774 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
5775 mdname(mddev));
5776 fput(mddev->bitmap_info.file);
5777 mddev->bitmap_info.file = NULL;
5778 return err;
5779 }
5780 mddev->bitmap_info.offset = 0;
5781 } else if (mddev->bitmap == NULL)
5782 return -ENOENT;
5783 err = 0;
5784 if (mddev->pers) {
5785 mddev->pers->quiesce(mddev, 1);
5786 if (fd >= 0) {
5787 err = bitmap_create(mddev);
5788 if (!err)
5789 err = bitmap_load(mddev);
5790 }
5791 if (fd < 0 || err) {
5792 bitmap_destroy(mddev);
5793 fd = -1;
5794 }
5795 mddev->pers->quiesce(mddev, 0);
5796 }
5797 if (fd < 0) {
5798 if (mddev->bitmap_info.file) {
5799 restore_bitmap_write_access(mddev->bitmap_info.file);
5800 fput(mddev->bitmap_info.file);
5801 }
5802 mddev->bitmap_info.file = NULL;
5803 }
5804
5805 return err;
5806}
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821static int set_array_info(struct mddev * mddev, mdu_array_info_t *info)
5822{
5823
5824 if (info->raid_disks == 0) {
5825
5826 if (info->major_version < 0 ||
5827 info->major_version >= ARRAY_SIZE(super_types) ||
5828 super_types[info->major_version].name == NULL) {
5829
5830 printk(KERN_INFO
5831 "md: superblock version %d not known\n",
5832 info->major_version);
5833 return -EINVAL;
5834 }
5835 mddev->major_version = info->major_version;
5836 mddev->minor_version = info->minor_version;
5837 mddev->patch_version = info->patch_version;
5838 mddev->persistent = !info->not_persistent;
5839
5840
5841
5842 mddev->ctime = get_seconds();
5843 return 0;
5844 }
5845 mddev->major_version = MD_MAJOR_VERSION;
5846 mddev->minor_version = MD_MINOR_VERSION;
5847 mddev->patch_version = MD_PATCHLEVEL_VERSION;
5848 mddev->ctime = get_seconds();
5849
5850 mddev->level = info->level;
5851 mddev->clevel[0] = 0;
5852 mddev->dev_sectors = 2 * (sector_t)info->size;
5853 mddev->raid_disks = info->raid_disks;
5854
5855
5856
5857 if (info->state & (1<<MD_SB_CLEAN))
5858 mddev->recovery_cp = MaxSector;
5859 else
5860 mddev->recovery_cp = 0;
5861 mddev->persistent = ! info->not_persistent;
5862 mddev->external = 0;
5863
5864 mddev->layout = info->layout;
5865 mddev->chunk_sectors = info->chunk_size >> 9;
5866
5867 mddev->max_disks = MD_SB_DISKS;
5868
5869 if (mddev->persistent)
5870 mddev->flags = 0;
5871 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5872
5873 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
5874 mddev->bitmap_info.offset = 0;
5875
5876 mddev->reshape_position = MaxSector;
5877
5878
5879
5880
5881 get_random_bytes(mddev->uuid, 16);
5882
5883 mddev->new_level = mddev->level;
5884 mddev->new_chunk_sectors = mddev->chunk_sectors;
5885 mddev->new_layout = mddev->layout;
5886 mddev->delta_disks = 0;
5887
5888 return 0;
5889}
5890
5891void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
5892{
5893 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
5894
5895 if (mddev->external_size)
5896 return;
5897
5898 mddev->array_sectors = array_sectors;
5899}
5900EXPORT_SYMBOL(md_set_array_sectors);
5901
5902static int update_size(struct mddev *mddev, sector_t num_sectors)
5903{
5904 struct md_rdev *rdev;
5905 int rv;
5906 int fit = (num_sectors == 0);
5907
5908 if (mddev->pers->resize == NULL)
5909 return -EINVAL;
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919 if (mddev->sync_thread)
5920 return -EBUSY;
5921 if (mddev->bitmap)
5922
5923
5924
5925 return -EBUSY;
5926 list_for_each_entry(rdev, &mddev->disks, same_set) {
5927 sector_t avail = rdev->sectors;
5928
5929 if (fit && (num_sectors == 0 || num_sectors > avail))
5930 num_sectors = avail;
5931 if (avail < num_sectors)
5932 return -ENOSPC;
5933 }
5934 rv = mddev->pers->resize(mddev, num_sectors);
5935 if (!rv)
5936 revalidate_disk(mddev->gendisk);
5937 return rv;
5938}
5939
5940static int update_raid_disks(struct mddev *mddev, int raid_disks)
5941{
5942 int rv;
5943
5944 if (mddev->pers->check_reshape == NULL)
5945 return -EINVAL;
5946 if (raid_disks <= 0 ||
5947 (mddev->max_disks && raid_disks >= mddev->max_disks))
5948 return -EINVAL;
5949 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
5950 return -EBUSY;
5951 mddev->delta_disks = raid_disks - mddev->raid_disks;
5952
5953 rv = mddev->pers->check_reshape(mddev);
5954 if (rv < 0)
5955 mddev->delta_disks = 0;
5956 return rv;
5957}
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
5969{
5970 int rv = 0;
5971 int cnt = 0;
5972 int state = 0;
5973
5974
5975 if (mddev->bitmap && mddev->bitmap_info.offset)
5976 state |= (1 << MD_SB_BITMAP_PRESENT);
5977
5978 if (mddev->major_version != info->major_version ||
5979 mddev->minor_version != info->minor_version ||
5980
5981 mddev->ctime != info->ctime ||
5982 mddev->level != info->level ||
5983
5984 !mddev->persistent != info->not_persistent||
5985 mddev->chunk_sectors != info->chunk_size >> 9 ||
5986
5987 ((state^info->state) & 0xfffffe00)
5988 )
5989 return -EINVAL;
5990
5991 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
5992 cnt++;
5993 if (mddev->raid_disks != info->raid_disks)
5994 cnt++;
5995 if (mddev->layout != info->layout)
5996 cnt++;
5997 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
5998 cnt++;
5999 if (cnt == 0)
6000 return 0;
6001 if (cnt > 1)
6002 return -EINVAL;
6003
6004 if (mddev->layout != info->layout) {
6005
6006
6007
6008
6009 if (mddev->pers->check_reshape == NULL)
6010 return -EINVAL;
6011 else {
6012 mddev->new_layout = info->layout;
6013 rv = mddev->pers->check_reshape(mddev);
6014 if (rv)
6015 mddev->new_layout = mddev->layout;
6016 return rv;
6017 }
6018 }
6019 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6020 rv = update_size(mddev, (sector_t)info->size * 2);
6021
6022 if (mddev->raid_disks != info->raid_disks)
6023 rv = update_raid_disks(mddev, info->raid_disks);
6024
6025 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6026 if (mddev->pers->quiesce == NULL)
6027 return -EINVAL;
6028 if (mddev->recovery || mddev->sync_thread)
6029 return -EBUSY;
6030 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6031
6032 if (mddev->bitmap)
6033 return -EEXIST;
6034 if (mddev->bitmap_info.default_offset == 0)
6035 return -EINVAL;
6036 mddev->bitmap_info.offset =
6037 mddev->bitmap_info.default_offset;
6038 mddev->pers->quiesce(mddev, 1);
6039 rv = bitmap_create(mddev);
6040 if (!rv)
6041 rv = bitmap_load(mddev);
6042 if (rv)
6043 bitmap_destroy(mddev);
6044 mddev->pers->quiesce(mddev, 0);
6045 } else {
6046
6047 if (!mddev->bitmap)
6048 return -ENOENT;
6049 if (mddev->bitmap->file)
6050 return -EINVAL;
6051 mddev->pers->quiesce(mddev, 1);
6052 bitmap_destroy(mddev);
6053 mddev->pers->quiesce(mddev, 0);
6054 mddev->bitmap_info.offset = 0;
6055 }
6056 }
6057 md_update_sb(mddev, 1);
6058 return rv;
6059}
6060
6061static int set_disk_faulty(struct mddev *mddev, dev_t dev)
6062{
6063 struct md_rdev *rdev;
6064
6065 if (mddev->pers == NULL)
6066 return -ENODEV;
6067
6068 rdev = find_rdev(mddev, dev);
6069 if (!rdev)
6070 return -ENODEV;
6071
6072 md_error(mddev, rdev);
6073 if (!test_bit(Faulty, &rdev->flags))
6074 return -EBUSY;
6075 return 0;
6076}
6077
6078
6079
6080
6081
6082
6083
6084static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
6085{
6086 struct mddev *mddev = bdev->bd_disk->private_data;
6087
6088 geo->heads = 2;
6089 geo->sectors = 4;
6090 geo->cylinders = mddev->array_sectors / 8;
6091 return 0;
6092}
6093
6094static int md_ioctl(struct block_device *bdev, fmode_t mode,
6095 unsigned int cmd, unsigned long arg)
6096{
6097 int err = 0;
6098 void __user *argp = (void __user *)arg;
6099 struct mddev *mddev = NULL;
6100 int ro;
6101
6102 switch (cmd) {
6103 case RAID_VERSION:
6104 case GET_ARRAY_INFO:
6105 case GET_DISK_INFO:
6106 break;
6107 default:
6108 if (!capable(CAP_SYS_ADMIN))
6109 return -EACCES;
6110 }
6111
6112
6113
6114
6115
6116 switch (cmd)
6117 {
6118 case RAID_VERSION:
6119 err = get_version(argp);
6120 goto done;
6121
6122 case PRINT_RAID_DEBUG:
6123 err = 0;
6124 md_print_devices();
6125 goto done;
6126
6127#ifndef MODULE
6128 case RAID_AUTORUN:
6129 err = 0;
6130 autostart_arrays(arg);
6131 goto done;
6132#endif
6133 default:;
6134 }
6135
6136
6137
6138
6139
6140 mddev = bdev->bd_disk->private_data;
6141
6142 if (!mddev) {
6143 BUG();
6144 goto abort;
6145 }
6146
6147 err = mddev_lock(mddev);
6148 if (err) {
6149 printk(KERN_INFO
6150 "md: ioctl lock interrupted, reason %d, cmd %d\n",
6151 err, cmd);
6152 goto abort;
6153 }
6154
6155 switch (cmd)
6156 {
6157 case SET_ARRAY_INFO:
6158 {
6159 mdu_array_info_t info;
6160 if (!arg)
6161 memset(&info, 0, sizeof(info));
6162 else if (copy_from_user(&info, argp, sizeof(info))) {
6163 err = -EFAULT;
6164 goto abort_unlock;
6165 }
6166 if (mddev->pers) {
6167 err = update_array_info(mddev, &info);
6168 if (err) {
6169 printk(KERN_WARNING "md: couldn't update"
6170 " array info. %d\n", err);
6171 goto abort_unlock;
6172 }
6173 goto done_unlock;
6174 }
6175 if (!list_empty(&mddev->disks)) {
6176 printk(KERN_WARNING
6177 "md: array %s already has disks!\n",
6178 mdname(mddev));
6179 err = -EBUSY;
6180 goto abort_unlock;
6181 }
6182 if (mddev->raid_disks) {
6183 printk(KERN_WARNING
6184 "md: array %s already initialised!\n",
6185 mdname(mddev));
6186 err = -EBUSY;
6187 goto abort_unlock;
6188 }
6189 err = set_array_info(mddev, &info);
6190 if (err) {
6191 printk(KERN_WARNING "md: couldn't set"
6192 " array info. %d\n", err);
6193 goto abort_unlock;
6194 }
6195 }
6196 goto done_unlock;
6197
6198 default:;
6199 }
6200
6201
6202
6203
6204
6205
6206 if ((!mddev->raid_disks && !mddev->external)
6207 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
6208 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
6209 && cmd != GET_BITMAP_FILE) {
6210 err = -ENODEV;
6211 goto abort_unlock;
6212 }
6213
6214
6215
6216
6217 switch (cmd)
6218 {
6219 case GET_ARRAY_INFO:
6220 err = get_array_info(mddev, argp);
6221 goto done_unlock;
6222
6223 case GET_BITMAP_FILE:
6224 err = get_bitmap_file(mddev, argp);
6225 goto done_unlock;
6226
6227 case GET_DISK_INFO:
6228 err = get_disk_info(mddev, argp);
6229 goto done_unlock;
6230
6231 case RESTART_ARRAY_RW:
6232 err = restart_array(mddev);
6233 goto done_unlock;
6234
6235 case STOP_ARRAY:
6236 err = do_md_stop(mddev, 0, 1);
6237 goto done_unlock;
6238
6239 case STOP_ARRAY_RO:
6240 err = md_set_readonly(mddev, 1);
6241 goto done_unlock;
6242
6243 case BLKROSET:
6244 if (get_user(ro, (int __user *)(arg))) {
6245 err = -EFAULT;
6246 goto done_unlock;
6247 }
6248 err = -EINVAL;
6249
6250
6251
6252
6253 if (ro)
6254 goto done_unlock;
6255
6256
6257 if (mddev->ro != 1)
6258 goto done_unlock;
6259
6260
6261
6262
6263 if (mddev->pers) {
6264 err = restart_array(mddev);
6265 if (err == 0) {
6266 mddev->ro = 2;
6267 set_disk_ro(mddev->gendisk, 0);
6268 }
6269 }
6270 goto done_unlock;
6271 }
6272
6273
6274
6275
6276
6277
6278
6279
6280 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
6281 if (mddev->ro == 2) {
6282 mddev->ro = 0;
6283 sysfs_notify_dirent_safe(mddev->sysfs_state);
6284 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6285 md_wakeup_thread(mddev->thread);
6286 } else {
6287 err = -EROFS;
6288 goto abort_unlock;
6289 }
6290 }
6291
6292 switch (cmd)
6293 {
6294 case ADD_NEW_DISK:
6295 {
6296 mdu_disk_info_t info;
6297 if (copy_from_user(&info, argp, sizeof(info)))
6298 err = -EFAULT;
6299 else
6300 err = add_new_disk(mddev, &info);
6301 goto done_unlock;
6302 }
6303
6304 case HOT_REMOVE_DISK:
6305 err = hot_remove_disk(mddev, new_decode_dev(arg));
6306 goto done_unlock;
6307
6308 case HOT_ADD_DISK:
6309 err = hot_add_disk(mddev, new_decode_dev(arg));
6310 goto done_unlock;
6311
6312 case SET_DISK_FAULTY:
6313 err = set_disk_faulty(mddev, new_decode_dev(arg));
6314 goto done_unlock;
6315
6316 case RUN_ARRAY:
6317 err = do_md_run(mddev);
6318 goto done_unlock;
6319
6320 case SET_BITMAP_FILE:
6321 err = set_bitmap_file(mddev, (int)arg);
6322 goto done_unlock;
6323
6324 default:
6325 err = -EINVAL;
6326 goto abort_unlock;
6327 }
6328
6329done_unlock:
6330abort_unlock:
6331 if (mddev->hold_active == UNTIL_IOCTL &&
6332 err != -EINVAL)
6333 mddev->hold_active = 0;
6334 mddev_unlock(mddev);
6335
6336 return err;
6337done:
6338 if (err)
6339 MD_BUG();
6340abort:
6341 return err;
6342}
6343#ifdef CONFIG_COMPAT
6344static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
6345 unsigned int cmd, unsigned long arg)
6346{
6347 switch (cmd) {
6348 case HOT_REMOVE_DISK:
6349 case HOT_ADD_DISK:
6350 case SET_DISK_FAULTY:
6351 case SET_BITMAP_FILE:
6352
6353 break;
6354 default:
6355 arg = (unsigned long)compat_ptr(arg);
6356 break;
6357 }
6358
6359 return md_ioctl(bdev, mode, cmd, arg);
6360}
6361#endif
6362
6363static int md_open(struct block_device *bdev, fmode_t mode)
6364{
6365
6366
6367
6368
6369 struct mddev *mddev = mddev_find(bdev->bd_dev);
6370 int err;
6371
6372 if (mddev->gendisk != bdev->bd_disk) {
6373
6374
6375
6376 mddev_put(mddev);
6377
6378 flush_workqueue(md_misc_wq);
6379
6380 return -ERESTARTSYS;
6381 }
6382 BUG_ON(mddev != bdev->bd_disk->private_data);
6383
6384 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
6385 goto out;
6386
6387 err = 0;
6388 atomic_inc(&mddev->openers);
6389 mutex_unlock(&mddev->open_mutex);
6390
6391 check_disk_change(bdev);
6392 out:
6393 return err;
6394}
6395
6396static int md_release(struct gendisk *disk, fmode_t mode)
6397{
6398 struct mddev *mddev = disk->private_data;
6399
6400 BUG_ON(!mddev);
6401 atomic_dec(&mddev->openers);
6402 mddev_put(mddev);
6403
6404 return 0;
6405}
6406
6407static int md_media_changed(struct gendisk *disk)
6408{
6409 struct mddev *mddev = disk->private_data;
6410
6411 return mddev->changed;
6412}
6413
6414static int md_revalidate(struct gendisk *disk)
6415{
6416 struct mddev *mddev = disk->private_data;
6417
6418 mddev->changed = 0;
6419 return 0;
6420}
6421static const struct block_device_operations md_fops =
6422{
6423 .owner = THIS_MODULE,
6424 .open = md_open,
6425 .release = md_release,
6426 .ioctl = md_ioctl,
6427#ifdef CONFIG_COMPAT
6428 .compat_ioctl = md_compat_ioctl,
6429#endif
6430 .getgeo = md_getgeo,
6431 .media_changed = md_media_changed,
6432 .revalidate_disk= md_revalidate,
6433};
6434
6435static int md_thread(void * arg)
6436{
6437 struct md_thread *thread = arg;
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451 allow_signal(SIGKILL);
6452 while (!kthread_should_stop()) {
6453
6454
6455
6456
6457
6458
6459 if (signal_pending(current))
6460 flush_signals(current);
6461
6462 wait_event_interruptible_timeout
6463 (thread->wqueue,
6464 test_bit(THREAD_WAKEUP, &thread->flags)
6465 || kthread_should_stop(),
6466 thread->timeout);
6467
6468 clear_bit(THREAD_WAKEUP, &thread->flags);
6469 if (!kthread_should_stop())
6470 thread->run(thread->mddev);
6471 }
6472
6473 return 0;
6474}
6475
6476void md_wakeup_thread(struct md_thread *thread)
6477{
6478 if (thread) {
6479 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
6480 set_bit(THREAD_WAKEUP, &thread->flags);
6481 wake_up(&thread->wqueue);
6482 }
6483}
6484
6485struct md_thread *md_register_thread(void (*run) (struct mddev *), struct mddev *mddev,
6486 const char *name)
6487{
6488 struct md_thread *thread;
6489
6490 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
6491 if (!thread)
6492 return NULL;
6493
6494 init_waitqueue_head(&thread->wqueue);
6495
6496 thread->run = run;
6497 thread->mddev = mddev;
6498 thread->timeout = MAX_SCHEDULE_TIMEOUT;
6499 thread->tsk = kthread_run(md_thread, thread,
6500 "%s_%s",
6501 mdname(thread->mddev),
6502 name ?: mddev->pers->name);
6503 if (IS_ERR(thread->tsk)) {
6504 kfree(thread);
6505 return NULL;
6506 }
6507 return thread;
6508}
6509
6510void md_unregister_thread(struct md_thread **threadp)
6511{
6512 struct md_thread *thread = *threadp;
6513 if (!thread)
6514 return;
6515 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
6516
6517
6518
6519 spin_lock(&pers_lock);
6520 *threadp = NULL;
6521 spin_unlock(&pers_lock);
6522
6523 kthread_stop(thread->tsk);
6524 kfree(thread);
6525}
6526
6527void md_error(struct mddev *mddev, struct md_rdev *rdev)
6528{
6529 if (!mddev) {
6530 MD_BUG();
6531 return;
6532 }
6533
6534 if (!rdev || test_bit(Faulty, &rdev->flags))
6535 return;
6536
6537 if (!mddev->pers || !mddev->pers->error_handler)
6538 return;
6539 mddev->pers->error_handler(mddev,rdev);
6540 if (mddev->degraded)
6541 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6542 sysfs_notify_dirent_safe(rdev->sysfs_state);
6543 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6544 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6545 md_wakeup_thread(mddev->thread);
6546 if (mddev->event_work.func)
6547 queue_work(md_misc_wq, &mddev->event_work);
6548 md_new_event_inintr(mddev);
6549}
6550
6551
6552
6553static void status_unused(struct seq_file *seq)
6554{
6555 int i = 0;
6556 struct md_rdev *rdev;
6557
6558 seq_printf(seq, "unused devices: ");
6559
6560 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
6561 char b[BDEVNAME_SIZE];
6562 i++;
6563 seq_printf(seq, "%s ",
6564 bdevname(rdev->bdev,b));
6565 }
6566 if (!i)
6567 seq_printf(seq, "<none>");
6568
6569 seq_printf(seq, "\n");
6570}
6571
6572
6573static void status_resync(struct seq_file *seq, struct mddev * mddev)
6574{
6575 sector_t max_sectors, resync, res;
6576 unsigned long dt, db;
6577 sector_t rt;
6578 int scale;
6579 unsigned int per_milli;
6580
6581 resync = mddev->curr_resync - atomic_read(&mddev->recovery_active);
6582
6583 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
6584 max_sectors = mddev->resync_max_sectors;
6585 else
6586 max_sectors = mddev->dev_sectors;
6587
6588
6589
6590
6591 if (!max_sectors) {
6592 MD_BUG();
6593 return;
6594 }
6595
6596
6597
6598
6599
6600 scale = 10;
6601 if (sizeof(sector_t) > sizeof(unsigned long)) {
6602 while ( max_sectors/2 > (1ULL<<(scale+32)))
6603 scale++;
6604 }
6605 res = (resync>>scale)*1000;
6606 sector_div(res, (u32)((max_sectors>>scale)+1));
6607
6608 per_milli = res;
6609 {
6610 int i, x = per_milli/50, y = 20-x;
6611 seq_printf(seq, "[");
6612 for (i = 0; i < x; i++)
6613 seq_printf(seq, "=");
6614 seq_printf(seq, ">");
6615 for (i = 0; i < y; i++)
6616 seq_printf(seq, ".");
6617 seq_printf(seq, "] ");
6618 }
6619 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
6620 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
6621 "reshape" :
6622 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
6623 "check" :
6624 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
6625 "resync" : "recovery"))),
6626 per_milli/10, per_milli % 10,
6627 (unsigned long long) resync/2,
6628 (unsigned long long) max_sectors/2);
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644 dt = ((jiffies - mddev->resync_mark) / HZ);
6645 if (!dt) dt++;
6646 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
6647 - mddev->resync_mark_cnt;
6648
6649 rt = max_sectors - resync;
6650 sector_div(rt, db/32+1);
6651 rt *= dt;
6652 rt >>= 5;
6653
6654 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
6655 ((unsigned long)rt % 60)/6);
6656
6657 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
6658}
6659
6660static void *md_seq_start(struct seq_file *seq, loff_t *pos)
6661{
6662 struct list_head *tmp;
6663 loff_t l = *pos;
6664 struct mddev *mddev;
6665
6666 if (l >= 0x10000)
6667 return NULL;
6668 if (!l--)
6669
6670 return (void*)1;
6671
6672 spin_lock(&all_mddevs_lock);
6673 list_for_each(tmp,&all_mddevs)
6674 if (!l--) {
6675 mddev = list_entry(tmp, struct mddev, all_mddevs);
6676 mddev_get(mddev);
6677 spin_unlock(&all_mddevs_lock);
6678 return mddev;
6679 }
6680 spin_unlock(&all_mddevs_lock);
6681 if (!l--)
6682 return (void*)2;
6683 return NULL;
6684}
6685
6686static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
6687{
6688 struct list_head *tmp;
6689 struct mddev *next_mddev, *mddev = v;
6690
6691 ++*pos;
6692 if (v == (void*)2)
6693 return NULL;
6694
6695 spin_lock(&all_mddevs_lock);
6696 if (v == (void*)1)
6697 tmp = all_mddevs.next;
6698 else
6699 tmp = mddev->all_mddevs.next;
6700 if (tmp != &all_mddevs)
6701 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
6702 else {
6703 next_mddev = (void*)2;
6704 *pos = 0x10000;
6705 }
6706 spin_unlock(&all_mddevs_lock);
6707
6708 if (v != (void*)1)
6709 mddev_put(mddev);
6710 return next_mddev;
6711
6712}
6713
6714static void md_seq_stop(struct seq_file *seq, void *v)
6715{
6716 struct mddev *mddev = v;
6717
6718 if (mddev && v != (void*)1 && v != (void*)2)
6719 mddev_put(mddev);
6720}
6721
6722static int md_seq_show(struct seq_file *seq, void *v)
6723{
6724 struct mddev *mddev = v;
6725 sector_t sectors;
6726 struct md_rdev *rdev;
6727 struct bitmap *bitmap;
6728
6729 if (v == (void*)1) {
6730 struct md_personality *pers;
6731 seq_printf(seq, "Personalities : ");
6732 spin_lock(&pers_lock);
6733 list_for_each_entry(pers, &pers_list, list)
6734 seq_printf(seq, "[%s] ", pers->name);
6735
6736 spin_unlock(&pers_lock);
6737 seq_printf(seq, "\n");
6738 seq->poll_event = atomic_read(&md_event_count);
6739 return 0;
6740 }
6741 if (v == (void*)2) {
6742 status_unused(seq);
6743 return 0;
6744 }
6745
6746 if (mddev_lock(mddev) < 0)
6747 return -EINTR;
6748
6749 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
6750 seq_printf(seq, "%s : %sactive", mdname(mddev),
6751 mddev->pers ? "" : "in");
6752 if (mddev->pers) {
6753 if (mddev->ro==1)
6754 seq_printf(seq, " (read-only)");
6755 if (mddev->ro==2)
6756 seq_printf(seq, " (auto-read-only)");
6757 seq_printf(seq, " %s", mddev->pers->name);
6758 }
6759
6760 sectors = 0;
6761 list_for_each_entry(rdev, &mddev->disks, same_set) {
6762 char b[BDEVNAME_SIZE];
6763 seq_printf(seq, " %s[%d]",
6764 bdevname(rdev->bdev,b), rdev->desc_nr);
6765 if (test_bit(WriteMostly, &rdev->flags))
6766 seq_printf(seq, "(W)");
6767 if (test_bit(Faulty, &rdev->flags)) {
6768 seq_printf(seq, "(F)");
6769 continue;
6770 }
6771 if (rdev->raid_disk < 0)
6772 seq_printf(seq, "(S)");
6773 if (test_bit(Replacement, &rdev->flags))
6774 seq_printf(seq, "(R)");
6775 sectors += rdev->sectors;
6776 }
6777
6778 if (!list_empty(&mddev->disks)) {
6779 if (mddev->pers)
6780 seq_printf(seq, "\n %llu blocks",
6781 (unsigned long long)
6782 mddev->array_sectors / 2);
6783 else
6784 seq_printf(seq, "\n %llu blocks",
6785 (unsigned long long)sectors / 2);
6786 }
6787 if (mddev->persistent) {
6788 if (mddev->major_version != 0 ||
6789 mddev->minor_version != 90) {
6790 seq_printf(seq," super %d.%d",
6791 mddev->major_version,
6792 mddev->minor_version);
6793 }
6794 } else if (mddev->external)
6795 seq_printf(seq, " super external:%s",
6796 mddev->metadata_type);
6797 else
6798 seq_printf(seq, " super non-persistent");
6799
6800 if (mddev->pers) {
6801 mddev->pers->status(seq, mddev);
6802 seq_printf(seq, "\n ");
6803 if (mddev->pers->sync_request) {
6804 if (mddev->curr_resync > 2) {
6805 status_resync(seq, mddev);
6806 seq_printf(seq, "\n ");
6807 } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2)
6808 seq_printf(seq, "\tresync=DELAYED\n ");
6809 else if (mddev->recovery_cp < MaxSector)
6810 seq_printf(seq, "\tresync=PENDING\n ");
6811 }
6812 } else
6813 seq_printf(seq, "\n ");
6814
6815 if ((bitmap = mddev->bitmap)) {
6816 unsigned long chunk_kb;
6817 unsigned long flags;
6818 spin_lock_irqsave(&bitmap->lock, flags);
6819 chunk_kb = mddev->bitmap_info.chunksize >> 10;
6820 seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
6821 "%lu%s chunk",
6822 bitmap->pages - bitmap->missing_pages,
6823 bitmap->pages,
6824 (bitmap->pages - bitmap->missing_pages)
6825 << (PAGE_SHIFT - 10),
6826 chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize,
6827 chunk_kb ? "KB" : "B");
6828 if (bitmap->file) {
6829 seq_printf(seq, ", file: ");
6830 seq_path(seq, &bitmap->file->f_path, " \t\n");
6831 }
6832
6833 seq_printf(seq, "\n");
6834 spin_unlock_irqrestore(&bitmap->lock, flags);
6835 }
6836
6837 seq_printf(seq, "\n");
6838 }
6839 mddev_unlock(mddev);
6840
6841 return 0;
6842}
6843
6844static const struct seq_operations md_seq_ops = {
6845 .start = md_seq_start,
6846 .next = md_seq_next,
6847 .stop = md_seq_stop,
6848 .show = md_seq_show,
6849};
6850
6851static int md_seq_open(struct inode *inode, struct file *file)
6852{
6853 struct seq_file *seq;
6854 int error;
6855
6856 error = seq_open(file, &md_seq_ops);
6857 if (error)
6858 return error;
6859
6860 seq = file->private_data;
6861 seq->poll_event = atomic_read(&md_event_count);
6862 return error;
6863}
6864
6865static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
6866{
6867 struct seq_file *seq = filp->private_data;
6868 int mask;
6869
6870 poll_wait(filp, &md_event_waiters, wait);
6871
6872
6873 mask = POLLIN | POLLRDNORM;
6874
6875 if (seq->poll_event != atomic_read(&md_event_count))
6876 mask |= POLLERR | POLLPRI;
6877 return mask;
6878}
6879
6880static const struct file_operations md_seq_fops = {
6881 .owner = THIS_MODULE,
6882 .open = md_seq_open,
6883 .read = seq_read,
6884 .llseek = seq_lseek,
6885 .release = seq_release_private,
6886 .poll = mdstat_poll,
6887};
6888
6889int register_md_personality(struct md_personality *p)
6890{
6891 spin_lock(&pers_lock);
6892 list_add_tail(&p->list, &pers_list);
6893 printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level);
6894 spin_unlock(&pers_lock);
6895 return 0;
6896}
6897
6898int unregister_md_personality(struct md_personality *p)
6899{
6900 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
6901 spin_lock(&pers_lock);
6902 list_del_init(&p->list);
6903 spin_unlock(&pers_lock);
6904 return 0;
6905}
6906
6907static int is_mddev_idle(struct mddev *mddev, int init)
6908{
6909 struct md_rdev * rdev;
6910 int idle;
6911 int curr_events;
6912
6913 idle = 1;
6914 rcu_read_lock();
6915 rdev_for_each_rcu(rdev, mddev) {
6916 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
6917 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
6918 (int)part_stat_read(&disk->part0, sectors[1]) -
6919 atomic_read(&disk->sync_io);
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942 if (init || curr_events - rdev->last_events > 64) {
6943 rdev->last_events = curr_events;
6944 idle = 0;
6945 }
6946 }
6947 rcu_read_unlock();
6948 return idle;
6949}
6950
6951void md_done_sync(struct mddev *mddev, int blocks, int ok)
6952{
6953
6954 atomic_sub(blocks, &mddev->recovery_active);
6955 wake_up(&mddev->recovery_wait);
6956 if (!ok) {
6957 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6958 md_wakeup_thread(mddev->thread);
6959
6960 }
6961}
6962
6963
6964
6965
6966
6967
6968
6969void md_write_start(struct mddev *mddev, struct bio *bi)
6970{
6971 int did_change = 0;
6972 if (bio_data_dir(bi) != WRITE)
6973 return;
6974
6975 BUG_ON(mddev->ro == 1);
6976 if (mddev->ro == 2) {
6977
6978 mddev->ro = 0;
6979 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6980 md_wakeup_thread(mddev->thread);
6981 md_wakeup_thread(mddev->sync_thread);
6982 did_change = 1;
6983 }
6984 atomic_inc(&mddev->writes_pending);
6985 if (mddev->safemode == 1)
6986 mddev->safemode = 0;
6987 if (mddev->in_sync) {
6988 spin_lock_irq(&mddev->write_lock);
6989 if (mddev->in_sync) {
6990 mddev->in_sync = 0;
6991 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6992 set_bit(MD_CHANGE_PENDING, &mddev->flags);
6993 md_wakeup_thread(mddev->thread);
6994 did_change = 1;
6995 }
6996 spin_unlock_irq(&mddev->write_lock);
6997 }
6998 if (did_change)
6999 sysfs_notify_dirent_safe(mddev->sysfs_state);
7000 wait_event(mddev->sb_wait,
7001 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
7002}
7003
7004void md_write_end(struct mddev *mddev)
7005{
7006 if (atomic_dec_and_test(&mddev->writes_pending)) {
7007 if (mddev->safemode == 2)
7008 md_wakeup_thread(mddev->thread);
7009 else if (mddev->safemode_delay)
7010 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
7011 }
7012}
7013
7014
7015
7016
7017
7018
7019
7020
7021
7022
7023int md_allow_write(struct mddev *mddev)
7024{
7025 if (!mddev->pers)
7026 return 0;
7027 if (mddev->ro)
7028 return 0;
7029 if (!mddev->pers->sync_request)
7030 return 0;
7031
7032 spin_lock_irq(&mddev->write_lock);
7033 if (mddev->in_sync) {
7034 mddev->in_sync = 0;
7035 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7036 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7037 if (mddev->safemode_delay &&
7038 mddev->safemode == 0)
7039 mddev->safemode = 1;
7040 spin_unlock_irq(&mddev->write_lock);
7041 md_update_sb(mddev, 0);
7042 sysfs_notify_dirent_safe(mddev->sysfs_state);
7043 } else
7044 spin_unlock_irq(&mddev->write_lock);
7045
7046 if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
7047 return -EAGAIN;
7048 else
7049 return 0;
7050}
7051EXPORT_SYMBOL_GPL(md_allow_write);
7052
7053#define SYNC_MARKS 10
7054#define SYNC_MARK_STEP (3*HZ)
7055void md_do_sync(struct mddev *mddev)
7056{
7057 struct mddev *mddev2;
7058 unsigned int currspeed = 0,
7059 window;
7060 sector_t max_sectors,j, io_sectors;
7061 unsigned long mark[SYNC_MARKS];
7062 sector_t mark_cnt[SYNC_MARKS];
7063 int last_mark,m;
7064 struct list_head *tmp;
7065 sector_t last_check;
7066 int skipped = 0;
7067 struct md_rdev *rdev;
7068 char *desc;
7069
7070
7071 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7072 return;
7073 if (mddev->ro)
7074 return;
7075
7076 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7077 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
7078 desc = "data-check";
7079 else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7080 desc = "requested-resync";
7081 else
7082 desc = "resync";
7083 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7084 desc = "reshape";
7085 else
7086 desc = "recovery";
7087
7088
7089
7090
7091
7092
7093
7094
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104 do {
7105 mddev->curr_resync = 2;
7106
7107 try_again:
7108 if (kthread_should_stop())
7109 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7110
7111 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7112 goto skip;
7113 for_each_mddev(mddev2, tmp) {
7114 if (mddev2 == mddev)
7115 continue;
7116 if (!mddev->parallel_resync
7117 && mddev2->curr_resync
7118 && match_mddev_units(mddev, mddev2)) {
7119 DEFINE_WAIT(wq);
7120 if (mddev < mddev2 && mddev->curr_resync == 2) {
7121
7122 mddev->curr_resync = 1;
7123 wake_up(&resync_wait);
7124 }
7125 if (mddev > mddev2 && mddev->curr_resync == 1)
7126
7127
7128
7129 continue;
7130
7131
7132
7133
7134 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
7135 if (!kthread_should_stop() &&
7136 mddev2->curr_resync >= mddev->curr_resync) {
7137 printk(KERN_INFO "md: delaying %s of %s"
7138 " until %s has finished (they"
7139 " share one or more physical units)\n",
7140 desc, mdname(mddev), mdname(mddev2));
7141 mddev_put(mddev2);
7142 if (signal_pending(current))
7143 flush_signals(current);
7144 schedule();
7145 finish_wait(&resync_wait, &wq);
7146 goto try_again;
7147 }
7148 finish_wait(&resync_wait, &wq);
7149 }
7150 }
7151 } while (mddev->curr_resync < 2);
7152
7153 j = 0;
7154 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7155
7156
7157
7158 max_sectors = mddev->resync_max_sectors;
7159 mddev->resync_mismatches = 0;
7160
7161 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7162 j = mddev->resync_min;
7163 else if (!mddev->bitmap)
7164 j = mddev->recovery_cp;
7165
7166 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7167 max_sectors = mddev->dev_sectors;
7168 else {
7169
7170 max_sectors = mddev->dev_sectors;
7171 j = MaxSector;
7172 rcu_read_lock();
7173 list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
7174 if (rdev->raid_disk >= 0 &&
7175 !test_bit(Faulty, &rdev->flags) &&
7176 !test_bit(In_sync, &rdev->flags) &&
7177 rdev->recovery_offset < j)
7178 j = rdev->recovery_offset;
7179 rcu_read_unlock();
7180 }
7181
7182 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
7183 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
7184 " %d KB/sec/disk.\n", speed_min(mddev));
7185 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
7186 "(but not more than %d KB/sec) for %s.\n",
7187 speed_max(mddev), desc);
7188
7189 is_mddev_idle(mddev, 1);
7190
7191 io_sectors = 0;
7192 for (m = 0; m < SYNC_MARKS; m++) {
7193 mark[m] = jiffies;
7194 mark_cnt[m] = io_sectors;
7195 }
7196 last_mark = 0;
7197 mddev->resync_mark = mark[last_mark];
7198 mddev->resync_mark_cnt = mark_cnt[last_mark];
7199
7200
7201
7202
7203 window = 32*(PAGE_SIZE/512);
7204 printk(KERN_INFO "md: using %dk window, over a total of %lluk.\n",
7205 window/2, (unsigned long long)max_sectors/2);
7206
7207 atomic_set(&mddev->recovery_active, 0);
7208 last_check = 0;
7209
7210 if (j>2) {
7211 printk(KERN_INFO
7212 "md: resuming %s of %s from checkpoint.\n",
7213 desc, mdname(mddev));
7214 mddev->curr_resync = j;
7215 }
7216 mddev->curr_resync_completed = j;
7217
7218 while (j < max_sectors) {
7219 sector_t sectors;
7220
7221 skipped = 0;
7222
7223 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7224 ((mddev->curr_resync > mddev->curr_resync_completed &&
7225 (mddev->curr_resync - mddev->curr_resync_completed)
7226 > (max_sectors >> 4)) ||
7227 (j - mddev->curr_resync_completed)*2
7228 >= mddev->resync_max - mddev->curr_resync_completed
7229 )) {
7230
7231 wait_event(mddev->recovery_wait,
7232 atomic_read(&mddev->recovery_active) == 0);
7233 mddev->curr_resync_completed = j;
7234 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7235 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7236 }
7237
7238 while (j >= mddev->resync_max && !kthread_should_stop()) {
7239
7240
7241
7242
7243 flush_signals(current);
7244 wait_event_interruptible(mddev->recovery_wait,
7245 mddev->resync_max > j
7246 || kthread_should_stop());
7247 }
7248
7249 if (kthread_should_stop())
7250 goto interrupted;
7251
7252 sectors = mddev->pers->sync_request(mddev, j, &skipped,
7253 currspeed < speed_min(mddev));
7254 if (sectors == 0) {
7255 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7256 goto out;
7257 }
7258
7259 if (!skipped) {
7260 io_sectors += sectors;
7261 atomic_add(sectors, &mddev->recovery_active);
7262 }
7263
7264 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7265 break;
7266
7267 j += sectors;
7268 if (j>1) mddev->curr_resync = j;
7269 mddev->curr_mark_cnt = io_sectors;
7270 if (last_check == 0)
7271
7272
7273
7274 md_new_event(mddev);
7275
7276 if (last_check + window > io_sectors || j == max_sectors)
7277 continue;
7278
7279 last_check = io_sectors;
7280 repeat:
7281 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
7282
7283 int next = (last_mark+1) % SYNC_MARKS;
7284
7285 mddev->resync_mark = mark[next];
7286 mddev->resync_mark_cnt = mark_cnt[next];
7287 mark[next] = jiffies;
7288 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
7289 last_mark = next;
7290 }
7291
7292
7293 if (kthread_should_stop())
7294 goto interrupted;
7295
7296
7297
7298
7299
7300
7301
7302
7303
7304
7305 cond_resched();
7306
7307 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
7308 /((jiffies-mddev->resync_mark)/HZ +1) +1;
7309
7310 if (currspeed > speed_min(mddev)) {
7311 if ((currspeed > speed_max(mddev)) ||
7312 !is_mddev_idle(mddev, 0)) {
7313 msleep(500);
7314 goto repeat;
7315 }
7316 }
7317 }
7318 printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
7319
7320
7321
7322 out:
7323 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
7324
7325
7326 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
7327
7328 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
7329 mddev->curr_resync > 2) {
7330 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7331 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7332 if (mddev->curr_resync >= mddev->recovery_cp) {
7333 printk(KERN_INFO
7334 "md: checkpointing %s of %s.\n",
7335 desc, mdname(mddev));
7336 mddev->recovery_cp =
7337 mddev->curr_resync_completed;
7338 }
7339 } else
7340 mddev->recovery_cp = MaxSector;
7341 } else {
7342 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7343 mddev->curr_resync = MaxSector;
7344 rcu_read_lock();
7345 list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
7346 if (rdev->raid_disk >= 0 &&
7347 mddev->delta_disks >= 0 &&
7348 !test_bit(Faulty, &rdev->flags) &&
7349 !test_bit(In_sync, &rdev->flags) &&
7350 rdev->recovery_offset < mddev->curr_resync)
7351 rdev->recovery_offset = mddev->curr_resync;
7352 rcu_read_unlock();
7353 }
7354 }
7355 skip:
7356 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7357
7358 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7359
7360 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7361 mddev->resync_min = 0;
7362 mddev->resync_max = MaxSector;
7363 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7364 mddev->resync_min = mddev->curr_resync_completed;
7365 mddev->curr_resync = 0;
7366 wake_up(&resync_wait);
7367 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
7368 md_wakeup_thread(mddev->thread);
7369 return;
7370
7371 interrupted:
7372
7373
7374
7375 printk(KERN_INFO
7376 "md: md_do_sync() got signal ... exiting\n");
7377 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7378 goto out;
7379
7380}
7381EXPORT_SYMBOL_GPL(md_do_sync);
7382
7383static int remove_and_add_spares(struct mddev *mddev)
7384{
7385 struct md_rdev *rdev;
7386 int spares = 0;
7387 int removed = 0;
7388
7389 mddev->curr_resync_completed = 0;
7390
7391 list_for_each_entry(rdev, &mddev->disks, same_set)
7392 if (rdev->raid_disk >= 0 &&
7393 !test_bit(Blocked, &rdev->flags) &&
7394 (test_bit(Faulty, &rdev->flags) ||
7395 ! test_bit(In_sync, &rdev->flags)) &&
7396 atomic_read(&rdev->nr_pending)==0) {
7397 if (mddev->pers->hot_remove_disk(
7398 mddev, rdev) == 0) {
7399 sysfs_unlink_rdev(mddev, rdev);
7400 rdev->raid_disk = -1;
7401 removed++;
7402 }
7403 }
7404 if (removed)
7405 sysfs_notify(&mddev->kobj, NULL,
7406 "degraded");
7407
7408
7409 list_for_each_entry(rdev, &mddev->disks, same_set) {
7410 if (rdev->raid_disk >= 0 &&
7411 !test_bit(In_sync, &rdev->flags) &&
7412 !test_bit(Faulty, &rdev->flags))
7413 spares++;
7414 if (rdev->raid_disk < 0
7415 && !test_bit(Faulty, &rdev->flags)) {
7416 rdev->recovery_offset = 0;
7417 if (mddev->pers->
7418 hot_add_disk(mddev, rdev) == 0) {
7419 if (sysfs_link_rdev(mddev, rdev))
7420 ;
7421 spares++;
7422 md_new_event(mddev);
7423 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7424 }
7425 }
7426 }
7427 return spares;
7428}
7429
7430static void reap_sync_thread(struct mddev *mddev)
7431{
7432 struct md_rdev *rdev;
7433
7434
7435 md_unregister_thread(&mddev->sync_thread);
7436 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7437 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7438
7439
7440 if (mddev->pers->spare_active(mddev))
7441 sysfs_notify(&mddev->kobj, NULL,
7442 "degraded");
7443 }
7444 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7445 mddev->pers->finish_reshape)
7446 mddev->pers->finish_reshape(mddev);
7447
7448
7449
7450
7451
7452
7453
7454 list_for_each_entry(rdev, &mddev->disks, same_set)
7455 if (!mddev->degraded ||
7456 test_bit(In_sync, &rdev->flags))
7457 rdev->saved_raid_disk = -1;
7458
7459 md_update_sb(mddev, 1);
7460 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7461 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7462 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7463 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7464 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7465
7466 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7467 sysfs_notify_dirent_safe(mddev->sysfs_action);
7468 md_new_event(mddev);
7469 if (mddev->event_work.func)
7470 queue_work(md_misc_wq, &mddev->event_work);
7471}
7472
7473
7474
7475
7476
7477
7478
7479
7480
7481
7482
7483
7484
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495void md_check_recovery(struct mddev *mddev)
7496{
7497 if (mddev->suspended)
7498 return;
7499
7500 if (mddev->bitmap)
7501 bitmap_daemon_work(mddev);
7502
7503 if (signal_pending(current)) {
7504 if (mddev->pers->sync_request && !mddev->external) {
7505 printk(KERN_INFO "md: %s in immediate safe mode\n",
7506 mdname(mddev));
7507 mddev->safemode = 2;
7508 }
7509 flush_signals(current);
7510 }
7511
7512 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
7513 return;
7514 if ( ! (
7515 (mddev->flags & ~ (1<<MD_CHANGE_PENDING)) ||
7516 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7517 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
7518 (mddev->external == 0 && mddev->safemode == 1) ||
7519 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
7520 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
7521 ))
7522 return;
7523
7524 if (mddev_trylock(mddev)) {
7525 int spares = 0;
7526
7527 if (mddev->ro) {
7528
7529
7530
7531 struct md_rdev *rdev;
7532 list_for_each_entry(rdev, &mddev->disks, same_set)
7533 if (rdev->raid_disk >= 0 &&
7534 !test_bit(Blocked, &rdev->flags) &&
7535 test_bit(Faulty, &rdev->flags) &&
7536 atomic_read(&rdev->nr_pending)==0) {
7537 if (mddev->pers->hot_remove_disk(
7538 mddev, rdev) == 0) {
7539 sysfs_unlink_rdev(mddev, rdev);
7540 rdev->raid_disk = -1;
7541 }
7542 }
7543 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7544 goto unlock;
7545 }
7546
7547 if (!mddev->external) {
7548 int did_change = 0;
7549 spin_lock_irq(&mddev->write_lock);
7550 if (mddev->safemode &&
7551 !atomic_read(&mddev->writes_pending) &&
7552 !mddev->in_sync &&
7553 mddev->recovery_cp == MaxSector) {
7554 mddev->in_sync = 1;
7555 did_change = 1;
7556 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7557 }
7558 if (mddev->safemode == 1)
7559 mddev->safemode = 0;
7560 spin_unlock_irq(&mddev->write_lock);
7561 if (did_change)
7562 sysfs_notify_dirent_safe(mddev->sysfs_state);
7563 }
7564
7565 if (mddev->flags)
7566 md_update_sb(mddev, 0);
7567
7568 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
7569 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
7570
7571 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7572 goto unlock;
7573 }
7574 if (mddev->sync_thread) {
7575 reap_sync_thread(mddev);
7576 goto unlock;
7577 }
7578
7579
7580
7581 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7582 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7583
7584
7585
7586 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
7587 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
7588
7589 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
7590 goto unlock;
7591
7592
7593
7594
7595
7596
7597
7598 if (mddev->reshape_position != MaxSector) {
7599 if (mddev->pers->check_reshape == NULL ||
7600 mddev->pers->check_reshape(mddev) != 0)
7601
7602 goto unlock;
7603 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7604 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7605 } else if ((spares = remove_and_add_spares(mddev))) {
7606 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7607 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7608 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7609 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7610 } else if (mddev->recovery_cp < MaxSector) {
7611 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7612 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7613 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
7614
7615 goto unlock;
7616
7617 if (mddev->pers->sync_request) {
7618 if (spares && mddev->bitmap && ! mddev->bitmap->file) {
7619
7620
7621
7622
7623 bitmap_write_all(mddev->bitmap);
7624 }
7625 mddev->sync_thread = md_register_thread(md_do_sync,
7626 mddev,
7627 "resync");
7628 if (!mddev->sync_thread) {
7629 printk(KERN_ERR "%s: could not start resync"
7630 " thread...\n",
7631 mdname(mddev));
7632
7633 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7634 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7635 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7636 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7637 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7638 } else
7639 md_wakeup_thread(mddev->sync_thread);
7640 sysfs_notify_dirent_safe(mddev->sysfs_action);
7641 md_new_event(mddev);
7642 }
7643 unlock:
7644 if (!mddev->sync_thread) {
7645 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7646 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
7647 &mddev->recovery))
7648 if (mddev->sysfs_action)
7649 sysfs_notify_dirent_safe(mddev->sysfs_action);
7650 }
7651 mddev_unlock(mddev);
7652 }
7653}
7654
7655void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
7656{
7657 sysfs_notify_dirent_safe(rdev->sysfs_state);
7658 wait_event_timeout(rdev->blocked_wait,
7659 !test_bit(Blocked, &rdev->flags) &&
7660 !test_bit(BlockedBadBlocks, &rdev->flags),
7661 msecs_to_jiffies(5000));
7662 rdev_dec_pending(rdev, mddev);
7663}
7664EXPORT_SYMBOL(md_wait_for_blocked_rdev);
7665
7666
7667
7668
7669
7670
7671
7672
7673
7674
7675
7676
7677
7678
7679
7680
7681
7682
7683
7684
7685
7686
7687
7688
7689
7690
7691
7692
7693int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
7694 sector_t *first_bad, int *bad_sectors)
7695{
7696 int hi;
7697 int lo = 0;
7698 u64 *p = bb->page;
7699 int rv = 0;
7700 sector_t target = s + sectors;
7701 unsigned seq;
7702
7703 if (bb->shift > 0) {
7704
7705 s >>= bb->shift;
7706 target += (1<<bb->shift) - 1;
7707 target >>= bb->shift;
7708 sectors = target - s;
7709 }
7710
7711
7712retry:
7713 seq = read_seqbegin(&bb->lock);
7714
7715 hi = bb->count;
7716
7717
7718
7719
7720
7721
7722
7723
7724
7725 while (hi - lo > 1) {
7726 int mid = (lo + hi) / 2;
7727 sector_t a = BB_OFFSET(p[mid]);
7728 if (a < target)
7729
7730
7731 lo = mid;
7732 else
7733
7734 hi = mid;
7735 }
7736
7737 if (hi > lo) {
7738
7739
7740
7741 while (lo >= 0 &&
7742 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
7743 if (BB_OFFSET(p[lo]) < target) {
7744
7745
7746
7747 if (rv != -1 && BB_ACK(p[lo]))
7748 rv = 1;
7749 else
7750 rv = -1;
7751 *first_bad = BB_OFFSET(p[lo]);
7752 *bad_sectors = BB_LEN(p[lo]);
7753 }
7754 lo--;
7755 }
7756 }
7757
7758 if (read_seqretry(&bb->lock, seq))
7759 goto retry;
7760
7761 return rv;
7762}
7763EXPORT_SYMBOL_GPL(md_is_badblock);
7764
7765
7766
7767
7768
7769
7770
7771
7772static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
7773 int acknowledged)
7774{
7775 u64 *p;
7776 int lo, hi;
7777 int rv = 1;
7778
7779 if (bb->shift < 0)
7780
7781 return 0;
7782
7783 if (bb->shift) {
7784
7785 sector_t next = s + sectors;
7786 s >>= bb->shift;
7787 next += (1<<bb->shift) - 1;
7788 next >>= bb->shift;
7789 sectors = next - s;
7790 }
7791
7792 write_seqlock_irq(&bb->lock);
7793
7794 p = bb->page;
7795 lo = 0;
7796 hi = bb->count;
7797
7798 while (hi - lo > 1) {
7799 int mid = (lo + hi) / 2;
7800 sector_t a = BB_OFFSET(p[mid]);
7801 if (a <= s)
7802 lo = mid;
7803 else
7804 hi = mid;
7805 }
7806 if (hi > lo && BB_OFFSET(p[lo]) > s)
7807 hi = lo;
7808
7809 if (hi > lo) {
7810
7811
7812
7813 sector_t a = BB_OFFSET(p[lo]);
7814 sector_t e = a + BB_LEN(p[lo]);
7815 int ack = BB_ACK(p[lo]);
7816 if (e >= s) {
7817
7818 if (s == a && s + sectors >= e)
7819
7820 ack = acknowledged;
7821 else
7822 ack = ack && acknowledged;
7823
7824 if (e < s + sectors)
7825 e = s + sectors;
7826 if (e - a <= BB_MAX_LEN) {
7827 p[lo] = BB_MAKE(a, e-a, ack);
7828 s = e;
7829 } else {
7830
7831
7832
7833 if (BB_LEN(p[lo]) != BB_MAX_LEN)
7834 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
7835 s = a + BB_MAX_LEN;
7836 }
7837 sectors = e - s;
7838 }
7839 }
7840 if (sectors && hi < bb->count) {
7841
7842
7843 sector_t a = BB_OFFSET(p[hi]);
7844 sector_t e = a + BB_LEN(p[hi]);
7845 int ack = BB_ACK(p[hi]);
7846 if (a <= s + sectors) {
7847
7848 if (e <= s + sectors) {
7849
7850 e = s + sectors;
7851 ack = acknowledged;
7852 } else
7853 ack = ack && acknowledged;
7854
7855 a = s;
7856 if (e - a <= BB_MAX_LEN) {
7857 p[hi] = BB_MAKE(a, e-a, ack);
7858 s = e;
7859 } else {
7860 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
7861 s = a + BB_MAX_LEN;
7862 }
7863 sectors = e - s;
7864 lo = hi;
7865 hi++;
7866 }
7867 }
7868 if (sectors == 0 && hi < bb->count) {
7869
7870
7871 sector_t a = BB_OFFSET(p[hi]);
7872 int lolen = BB_LEN(p[lo]);
7873 int hilen = BB_LEN(p[hi]);
7874 int newlen = lolen + hilen - (s - a);
7875 if (s >= a && newlen < BB_MAX_LEN) {
7876
7877 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
7878 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
7879 memmove(p + hi, p + hi + 1,
7880 (bb->count - hi - 1) * 8);
7881 bb->count--;
7882 }
7883 }
7884 while (sectors) {
7885
7886
7887 if (bb->count >= MD_MAX_BADBLOCKS) {
7888
7889 rv = 0;
7890 break;
7891 } else {
7892 int this_sectors = sectors;
7893 memmove(p + hi + 1, p + hi,
7894 (bb->count - hi) * 8);
7895 bb->count++;
7896
7897 if (this_sectors > BB_MAX_LEN)
7898 this_sectors = BB_MAX_LEN;
7899 p[hi] = BB_MAKE(s, this_sectors, acknowledged);
7900 sectors -= this_sectors;
7901 s += this_sectors;
7902 }
7903 }
7904
7905 bb->changed = 1;
7906 if (!acknowledged)
7907 bb->unacked_exist = 1;
7908 write_sequnlock_irq(&bb->lock);
7909
7910 return rv;
7911}
7912
7913int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
7914 int acknowledged)
7915{
7916 int rv = md_set_badblocks(&rdev->badblocks,
7917 s + rdev->data_offset, sectors, acknowledged);
7918 if (rv) {
7919
7920 sysfs_notify_dirent_safe(rdev->sysfs_state);
7921 set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
7922 md_wakeup_thread(rdev->mddev->thread);
7923 }
7924 return rv;
7925}
7926EXPORT_SYMBOL_GPL(rdev_set_badblocks);
7927
7928
7929
7930
7931
7932
7933
7934static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
7935{
7936 u64 *p;
7937 int lo, hi;
7938 sector_t target = s + sectors;
7939 int rv = 0;
7940
7941 if (bb->shift > 0) {
7942
7943
7944
7945
7946
7947
7948 s += (1<<bb->shift) - 1;
7949 s >>= bb->shift;
7950 target >>= bb->shift;
7951 sectors = target - s;
7952 }
7953
7954 write_seqlock_irq(&bb->lock);
7955
7956 p = bb->page;
7957 lo = 0;
7958 hi = bb->count;
7959
7960 while (hi - lo > 1) {
7961 int mid = (lo + hi) / 2;
7962 sector_t a = BB_OFFSET(p[mid]);
7963 if (a < target)
7964 lo = mid;
7965 else
7966 hi = mid;
7967 }
7968 if (hi > lo) {
7969
7970
7971
7972
7973 if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
7974
7975 int ack = BB_ACK(p[lo]);
7976 sector_t a = BB_OFFSET(p[lo]);
7977 sector_t end = a + BB_LEN(p[lo]);
7978
7979 if (a < s) {
7980
7981 if (bb->count >= MD_MAX_BADBLOCKS) {
7982 rv = 0;
7983 goto out;
7984 }
7985 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
7986 bb->count++;
7987 p[lo] = BB_MAKE(a, s-a, ack);
7988 lo++;
7989 }
7990 p[lo] = BB_MAKE(target, end - target, ack);
7991
7992 hi = lo;
7993 lo--;
7994 }
7995 while (lo >= 0 &&
7996 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
7997
7998 if (BB_OFFSET(p[lo]) < s) {
7999
8000 int ack = BB_ACK(p[lo]);
8001 sector_t start = BB_OFFSET(p[lo]);
8002 p[lo] = BB_MAKE(start, s - start, ack);
8003
8004 break;
8005 }
8006 lo--;
8007 }
8008
8009
8010
8011 if (hi - lo > 1) {
8012 memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
8013 bb->count -= (hi - lo - 1);
8014 }
8015 }
8016
8017 bb->changed = 1;
8018out:
8019 write_sequnlock_irq(&bb->lock);
8020 return rv;
8021}
8022
8023int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors)
8024{
8025 return md_clear_badblocks(&rdev->badblocks,
8026 s + rdev->data_offset,
8027 sectors);
8028}
8029EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8030
8031
8032
8033
8034
8035
8036void md_ack_all_badblocks(struct badblocks *bb)
8037{
8038 if (bb->page == NULL || bb->changed)
8039
8040 return;
8041 write_seqlock_irq(&bb->lock);
8042
8043 if (bb->changed == 0) {
8044 u64 *p = bb->page;
8045 int i;
8046 for (i = 0; i < bb->count ; i++) {
8047 if (!BB_ACK(p[i])) {
8048 sector_t start = BB_OFFSET(p[i]);
8049 int len = BB_LEN(p[i]);
8050 p[i] = BB_MAKE(start, len, 1);
8051 }
8052 }
8053 bb->unacked_exist = 0;
8054 }
8055 write_sequnlock_irq(&bb->lock);
8056}
8057EXPORT_SYMBOL_GPL(md_ack_all_badblocks);
8058
8059
8060
8061
8062
8063
8064
8065
8066
8067
8068
8069
8070
8071static ssize_t
8072badblocks_show(struct badblocks *bb, char *page, int unack)
8073{
8074 size_t len;
8075 int i;
8076 u64 *p = bb->page;
8077 unsigned seq;
8078
8079 if (bb->shift < 0)
8080 return 0;
8081
8082retry:
8083 seq = read_seqbegin(&bb->lock);
8084
8085 len = 0;
8086 i = 0;
8087
8088 while (len < PAGE_SIZE && i < bb->count) {
8089 sector_t s = BB_OFFSET(p[i]);
8090 unsigned int length = BB_LEN(p[i]);
8091 int ack = BB_ACK(p[i]);
8092 i++;
8093
8094 if (unack && ack)
8095 continue;
8096
8097 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
8098 (unsigned long long)s << bb->shift,
8099 length << bb->shift);
8100 }
8101 if (unack && len == 0)
8102 bb->unacked_exist = 0;
8103
8104 if (read_seqretry(&bb->lock, seq))
8105 goto retry;
8106
8107 return len;
8108}
8109
8110#define DO_DEBUG 1
8111
8112static ssize_t
8113badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack)
8114{
8115 unsigned long long sector;
8116 int length;
8117 char newline;
8118#ifdef DO_DEBUG
8119
8120
8121
8122 int clear = 0;
8123 if (page[0] == '-') {
8124 clear = 1;
8125 page++;
8126 }
8127#endif
8128
8129 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
8130 case 3:
8131 if (newline != '\n')
8132 return -EINVAL;
8133 case 2:
8134 if (length <= 0)
8135 return -EINVAL;
8136 break;
8137 default:
8138 return -EINVAL;
8139 }
8140
8141#ifdef DO_DEBUG
8142 if (clear) {
8143 md_clear_badblocks(bb, sector, length);
8144 return len;
8145 }
8146#endif
8147 if (md_set_badblocks(bb, sector, length, !unack))
8148 return len;
8149 else
8150 return -ENOSPC;
8151}
8152
8153static int md_notify_reboot(struct notifier_block *this,
8154 unsigned long code, void *x)
8155{
8156 struct list_head *tmp;
8157 struct mddev *mddev;
8158 int need_delay = 0;
8159
8160 if ((code == SYS_DOWN) || (code == SYS_HALT) || (code == SYS_POWER_OFF)) {
8161
8162 printk(KERN_INFO "md: stopping all md devices.\n");
8163
8164 for_each_mddev(mddev, tmp) {
8165 if (mddev_trylock(mddev)) {
8166
8167
8168
8169
8170 md_set_readonly(mddev, 100);
8171 mddev_unlock(mddev);
8172 }
8173 need_delay = 1;
8174 }
8175
8176
8177
8178
8179
8180
8181 if (need_delay)
8182 mdelay(1000*1);
8183 }
8184 return NOTIFY_DONE;
8185}
8186
8187static struct notifier_block md_notifier = {
8188 .notifier_call = md_notify_reboot,
8189 .next = NULL,
8190 .priority = INT_MAX,
8191};
8192
8193static void md_geninit(void)
8194{
8195 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
8196
8197 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
8198}
8199
8200static int __init md_init(void)
8201{
8202 int ret = -ENOMEM;
8203
8204 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
8205 if (!md_wq)
8206 goto err_wq;
8207
8208 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
8209 if (!md_misc_wq)
8210 goto err_misc_wq;
8211
8212 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
8213 goto err_md;
8214
8215 if ((ret = register_blkdev(0, "mdp")) < 0)
8216 goto err_mdp;
8217 mdp_major = ret;
8218
8219 blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
8220 md_probe, NULL, NULL);
8221 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
8222 md_probe, NULL, NULL);
8223
8224 register_reboot_notifier(&md_notifier);
8225 raid_table_header = register_sysctl_table(raid_root_table);
8226
8227 md_geninit();
8228 return 0;
8229
8230err_mdp:
8231 unregister_blkdev(MD_MAJOR, "md");
8232err_md:
8233 destroy_workqueue(md_misc_wq);
8234err_misc_wq:
8235 destroy_workqueue(md_wq);
8236err_wq:
8237 return ret;
8238}
8239
8240#ifndef MODULE
8241
8242
8243
8244
8245
8246
8247static LIST_HEAD(all_detected_devices);
8248struct detected_devices_node {
8249 struct list_head list;
8250 dev_t dev;
8251};
8252
8253void md_autodetect_dev(dev_t dev)
8254{
8255 struct detected_devices_node *node_detected_dev;
8256
8257 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
8258 if (node_detected_dev) {
8259 node_detected_dev->dev = dev;
8260 list_add_tail(&node_detected_dev->list, &all_detected_devices);
8261 } else {
8262 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
8263 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
8264 }
8265}
8266
8267
8268static void autostart_arrays(int part)
8269{
8270 struct md_rdev *rdev;
8271 struct detected_devices_node *node_detected_dev;
8272 dev_t dev;
8273 int i_scanned, i_passed;
8274
8275 i_scanned = 0;
8276 i_passed = 0;
8277
8278 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
8279
8280 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
8281 i_scanned++;
8282 node_detected_dev = list_entry(all_detected_devices.next,
8283 struct detected_devices_node, list);
8284 list_del(&node_detected_dev->list);
8285 dev = node_detected_dev->dev;
8286 kfree(node_detected_dev);
8287 rdev = md_import_device(dev,0, 90);
8288 if (IS_ERR(rdev))
8289 continue;
8290
8291 if (test_bit(Faulty, &rdev->flags)) {
8292 MD_BUG();
8293 continue;
8294 }
8295 set_bit(AutoDetected, &rdev->flags);
8296 list_add(&rdev->same_set, &pending_raid_disks);
8297 i_passed++;
8298 }
8299
8300 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
8301 i_scanned, i_passed);
8302
8303 autorun_devices(part);
8304}
8305
8306#endif
8307
8308static __exit void md_exit(void)
8309{
8310 struct mddev *mddev;
8311 struct list_head *tmp;
8312
8313 blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
8314 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
8315
8316 unregister_blkdev(MD_MAJOR,"md");
8317 unregister_blkdev(mdp_major, "mdp");
8318 unregister_reboot_notifier(&md_notifier);
8319 unregister_sysctl_table(raid_table_header);
8320 remove_proc_entry("mdstat", NULL);
8321 for_each_mddev(mddev, tmp) {
8322 export_array(mddev);
8323 mddev->hold_active = 0;
8324 }
8325 destroy_workqueue(md_misc_wq);
8326 destroy_workqueue(md_wq);
8327}
8328
8329subsys_initcall(md_init);
8330module_exit(md_exit)
8331
8332static int get_ro(char *buffer, struct kernel_param *kp)
8333{
8334 return sprintf(buffer, "%d", start_readonly);
8335}
8336static int set_ro(const char *val, struct kernel_param *kp)
8337{
8338 char *e;
8339 int num = simple_strtoul(val, &e, 10);
8340 if (*val && (*e == '\0' || *e == '\n')) {
8341 start_readonly = num;
8342 return 0;
8343 }
8344 return -EINVAL;
8345}
8346
8347module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
8348module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
8349
8350module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
8351
8352EXPORT_SYMBOL(register_md_personality);
8353EXPORT_SYMBOL(unregister_md_personality);
8354EXPORT_SYMBOL(md_error);
8355EXPORT_SYMBOL(md_done_sync);
8356EXPORT_SYMBOL(md_write_start);
8357EXPORT_SYMBOL(md_write_end);
8358EXPORT_SYMBOL(md_register_thread);
8359EXPORT_SYMBOL(md_unregister_thread);
8360EXPORT_SYMBOL(md_wakeup_thread);
8361EXPORT_SYMBOL(md_check_recovery);
8362MODULE_LICENSE("GPL");
8363MODULE_DESCRIPTION("MD RAID framework");
8364MODULE_ALIAS("md");
8365MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
8366