1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/blkdev.h>
37#include <linux/sysctl.h>
38#include <linux/seq_file.h>
39#include <linux/mutex.h>
40#include <linux/buffer_head.h>
41#include <linux/poll.h>
42#include <linux/ctype.h>
43#include <linux/string.h>
44#include <linux/hdreg.h>
45#include <linux/proc_fs.h>
46#include <linux/random.h>
47#include <linux/reboot.h>
48#include <linux/file.h>
49#include <linux/compat.h>
50#include <linux/delay.h>
51#include <linux/raid/md_p.h>
52#include <linux/raid/md_u.h>
53#include <linux/slab.h>
54#include "md.h"
55#include "bitmap.h"
56
57#define DEBUG 0
58#define dprintk(x...) ((void)(DEBUG && printk(x)))
59
60#ifndef MODULE
61static void autostart_arrays(int part);
62#endif
63
64static LIST_HEAD(pers_list);
65static DEFINE_SPINLOCK(pers_lock);
66
67static void md_print_devices(void);
68
69static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
70static struct workqueue_struct *md_wq;
71static struct workqueue_struct *md_misc_wq;
72
73#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
74
75
76
77
78
79
80#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
81
82
83
84
85
86
87
88
89
90
91
92
93
94static int sysctl_speed_limit_min = 1000;
95static int sysctl_speed_limit_max = 200000;
96static inline int speed_min(mddev_t *mddev)
97{
98 return mddev->sync_speed_min ?
99 mddev->sync_speed_min : sysctl_speed_limit_min;
100}
101
102static inline int speed_max(mddev_t *mddev)
103{
104 return mddev->sync_speed_max ?
105 mddev->sync_speed_max : sysctl_speed_limit_max;
106}
107
108static struct ctl_table_header *raid_table_header;
109
110static ctl_table raid_table[] = {
111 {
112 .procname = "speed_limit_min",
113 .data = &sysctl_speed_limit_min,
114 .maxlen = sizeof(int),
115 .mode = S_IRUGO|S_IWUSR,
116 .proc_handler = proc_dointvec,
117 },
118 {
119 .procname = "speed_limit_max",
120 .data = &sysctl_speed_limit_max,
121 .maxlen = sizeof(int),
122 .mode = S_IRUGO|S_IWUSR,
123 .proc_handler = proc_dointvec,
124 },
125 { }
126};
127
128static ctl_table raid_dir_table[] = {
129 {
130 .procname = "raid",
131 .maxlen = 0,
132 .mode = S_IRUGO|S_IXUGO,
133 .child = raid_table,
134 },
135 { }
136};
137
138static ctl_table raid_root_table[] = {
139 {
140 .procname = "dev",
141 .maxlen = 0,
142 .mode = 0555,
143 .child = raid_dir_table,
144 },
145 { }
146};
147
148static const struct block_device_operations md_fops;
149
150static int start_readonly;
151
152
153
154
155
156static void mddev_bio_destructor(struct bio *bio)
157{
158 mddev_t *mddev, **mddevp;
159
160 mddevp = (void*)bio;
161 mddev = mddevp[-1];
162
163 bio_free(bio, mddev->bio_set);
164}
165
166struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
167 mddev_t *mddev)
168{
169 struct bio *b;
170 mddev_t **mddevp;
171
172 if (!mddev || !mddev->bio_set)
173 return bio_alloc(gfp_mask, nr_iovecs);
174
175 b = bio_alloc_bioset(gfp_mask, nr_iovecs,
176 mddev->bio_set);
177 if (!b)
178 return NULL;
179 mddevp = (void*)b;
180 mddevp[-1] = mddev;
181 b->bi_destructor = mddev_bio_destructor;
182 return b;
183}
184EXPORT_SYMBOL_GPL(bio_alloc_mddev);
185
186struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
187 mddev_t *mddev)
188{
189 struct bio *b;
190 mddev_t **mddevp;
191
192 if (!mddev || !mddev->bio_set)
193 return bio_clone(bio, gfp_mask);
194
195 b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs,
196 mddev->bio_set);
197 if (!b)
198 return NULL;
199 mddevp = (void*)b;
200 mddevp[-1] = mddev;
201 b->bi_destructor = mddev_bio_destructor;
202 __bio_clone(b, bio);
203 if (bio_integrity(bio)) {
204 int ret;
205
206 ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set);
207
208 if (ret < 0) {
209 bio_put(b);
210 return NULL;
211 }
212 }
213
214 return b;
215}
216EXPORT_SYMBOL_GPL(bio_clone_mddev);
217
218
219
220
221
222
223
224
225
226
227
228static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
229static atomic_t md_event_count;
230void md_new_event(mddev_t *mddev)
231{
232 atomic_inc(&md_event_count);
233 wake_up(&md_event_waiters);
234}
235EXPORT_SYMBOL_GPL(md_new_event);
236
237
238
239
240static void md_new_event_inintr(mddev_t *mddev)
241{
242 atomic_inc(&md_event_count);
243 wake_up(&md_event_waiters);
244}
245
246
247
248
249
250static LIST_HEAD(all_mddevs);
251static DEFINE_SPINLOCK(all_mddevs_lock);
252
253
254
255
256
257
258
259
260
261#define for_each_mddev(mddev,tmp) \
262 \
263 for (({ spin_lock(&all_mddevs_lock); \
264 tmp = all_mddevs.next; \
265 mddev = NULL;}); \
266 ({ if (tmp != &all_mddevs) \
267 mddev_get(list_entry(tmp, mddev_t, all_mddevs));\
268 spin_unlock(&all_mddevs_lock); \
269 if (mddev) mddev_put(mddev); \
270 mddev = list_entry(tmp, mddev_t, all_mddevs); \
271 tmp != &all_mddevs;}); \
272 ({ spin_lock(&all_mddevs_lock); \
273 tmp = tmp->next;}) \
274 )
275
276
277
278
279
280
281
282
283
284static int md_make_request(struct request_queue *q, struct bio *bio)
285{
286 const int rw = bio_data_dir(bio);
287 mddev_t *mddev = q->queuedata;
288 int rv;
289 int cpu;
290 unsigned int sectors;
291
292 if (mddev == NULL || mddev->pers == NULL
293 || !mddev->ready) {
294 bio_io_error(bio);
295 return 0;
296 }
297 smp_rmb();
298 rcu_read_lock();
299 if (mddev->suspended) {
300 DEFINE_WAIT(__wait);
301 for (;;) {
302 prepare_to_wait(&mddev->sb_wait, &__wait,
303 TASK_UNINTERRUPTIBLE);
304 if (!mddev->suspended)
305 break;
306 rcu_read_unlock();
307 schedule();
308 rcu_read_lock();
309 }
310 finish_wait(&mddev->sb_wait, &__wait);
311 }
312 atomic_inc(&mddev->active_io);
313 rcu_read_unlock();
314
315
316
317
318
319 sectors = bio_sectors(bio);
320 rv = mddev->pers->make_request(mddev, bio);
321
322 cpu = part_stat_lock();
323 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
324 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
325 part_stat_unlock();
326
327 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
328 wake_up(&mddev->sb_wait);
329
330 return rv;
331}
332
333
334
335
336
337
338
339void mddev_suspend(mddev_t *mddev)
340{
341 BUG_ON(mddev->suspended);
342 mddev->suspended = 1;
343 synchronize_rcu();
344 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
345 mddev->pers->quiesce(mddev, 1);
346}
347EXPORT_SYMBOL_GPL(mddev_suspend);
348
349void mddev_resume(mddev_t *mddev)
350{
351 mddev->suspended = 0;
352 wake_up(&mddev->sb_wait);
353 mddev->pers->quiesce(mddev, 0);
354
355 md_wakeup_thread(mddev->thread);
356 md_wakeup_thread(mddev->sync_thread);
357}
358EXPORT_SYMBOL_GPL(mddev_resume);
359
360int mddev_congested(mddev_t *mddev, int bits)
361{
362 return mddev->suspended;
363}
364EXPORT_SYMBOL(mddev_congested);
365
366
367
368
369
370static void md_end_flush(struct bio *bio, int err)
371{
372 mdk_rdev_t *rdev = bio->bi_private;
373 mddev_t *mddev = rdev->mddev;
374
375 rdev_dec_pending(rdev, mddev);
376
377 if (atomic_dec_and_test(&mddev->flush_pending)) {
378
379 queue_work(md_wq, &mddev->flush_work);
380 }
381 bio_put(bio);
382}
383
384static void md_submit_flush_data(struct work_struct *ws);
385
386static void submit_flushes(struct work_struct *ws)
387{
388 mddev_t *mddev = container_of(ws, mddev_t, flush_work);
389 mdk_rdev_t *rdev;
390
391 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
392 atomic_set(&mddev->flush_pending, 1);
393 rcu_read_lock();
394 list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
395 if (rdev->raid_disk >= 0 &&
396 !test_bit(Faulty, &rdev->flags)) {
397
398
399
400
401 struct bio *bi;
402 atomic_inc(&rdev->nr_pending);
403 atomic_inc(&rdev->nr_pending);
404 rcu_read_unlock();
405 bi = bio_alloc_mddev(GFP_KERNEL, 0, mddev);
406 bi->bi_end_io = md_end_flush;
407 bi->bi_private = rdev;
408 bi->bi_bdev = rdev->bdev;
409 atomic_inc(&mddev->flush_pending);
410 submit_bio(WRITE_FLUSH, bi);
411 rcu_read_lock();
412 rdev_dec_pending(rdev, mddev);
413 }
414 rcu_read_unlock();
415 if (atomic_dec_and_test(&mddev->flush_pending))
416 queue_work(md_wq, &mddev->flush_work);
417}
418
419static void md_submit_flush_data(struct work_struct *ws)
420{
421 mddev_t *mddev = container_of(ws, mddev_t, flush_work);
422 struct bio *bio = mddev->flush_bio;
423
424 if (bio->bi_size == 0)
425
426 bio_endio(bio, 0);
427 else {
428 bio->bi_rw &= ~REQ_FLUSH;
429 if (mddev->pers->make_request(mddev, bio))
430 generic_make_request(bio);
431 }
432
433 mddev->flush_bio = NULL;
434 wake_up(&mddev->sb_wait);
435}
436
437void md_flush_request(mddev_t *mddev, struct bio *bio)
438{
439 spin_lock_irq(&mddev->write_lock);
440 wait_event_lock_irq(mddev->sb_wait,
441 !mddev->flush_bio,
442 mddev->write_lock, );
443 mddev->flush_bio = bio;
444 spin_unlock_irq(&mddev->write_lock);
445
446 INIT_WORK(&mddev->flush_work, submit_flushes);
447 queue_work(md_wq, &mddev->flush_work);
448}
449EXPORT_SYMBOL(md_flush_request);
450
451
452
453
454
455
456
457
458
459struct md_plug_cb {
460 struct blk_plug_cb cb;
461 mddev_t *mddev;
462};
463
464static void plugger_unplug(struct blk_plug_cb *cb)
465{
466 struct md_plug_cb *mdcb = container_of(cb, struct md_plug_cb, cb);
467 if (atomic_dec_and_test(&mdcb->mddev->plug_cnt))
468 md_wakeup_thread(mdcb->mddev->thread);
469 kfree(mdcb);
470}
471
472
473
474
475int mddev_check_plugged(mddev_t *mddev)
476{
477 struct blk_plug *plug = current->plug;
478 struct md_plug_cb *mdcb;
479
480 if (!plug)
481 return 0;
482
483 list_for_each_entry(mdcb, &plug->cb_list, cb.list) {
484 if (mdcb->cb.callback == plugger_unplug &&
485 mdcb->mddev == mddev) {
486
487 if (mdcb != list_first_entry(&plug->cb_list,
488 struct md_plug_cb,
489 cb.list))
490 list_move(&mdcb->cb.list, &plug->cb_list);
491 return 1;
492 }
493 }
494
495 mdcb = kmalloc(sizeof(*mdcb), GFP_ATOMIC);
496 if (!mdcb)
497 return 0;
498
499 mdcb->mddev = mddev;
500 mdcb->cb.callback = plugger_unplug;
501 atomic_inc(&mddev->plug_cnt);
502 list_add(&mdcb->cb.list, &plug->cb_list);
503 return 1;
504}
505EXPORT_SYMBOL_GPL(mddev_check_plugged);
506
507static inline mddev_t *mddev_get(mddev_t *mddev)
508{
509 atomic_inc(&mddev->active);
510 return mddev;
511}
512
513static void mddev_delayed_delete(struct work_struct *ws);
514
515static void mddev_put(mddev_t *mddev)
516{
517 struct bio_set *bs = NULL;
518
519 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
520 return;
521 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
522 mddev->ctime == 0 && !mddev->hold_active) {
523
524
525 list_del(&mddev->all_mddevs);
526 bs = mddev->bio_set;
527 mddev->bio_set = NULL;
528 if (mddev->gendisk) {
529
530
531
532
533
534 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
535 queue_work(md_misc_wq, &mddev->del_work);
536 } else
537 kfree(mddev);
538 }
539 spin_unlock(&all_mddevs_lock);
540 if (bs)
541 bioset_free(bs);
542}
543
544void mddev_init(mddev_t *mddev)
545{
546 mutex_init(&mddev->open_mutex);
547 mutex_init(&mddev->reconfig_mutex);
548 mutex_init(&mddev->bitmap_info.mutex);
549 INIT_LIST_HEAD(&mddev->disks);
550 INIT_LIST_HEAD(&mddev->all_mddevs);
551 init_timer(&mddev->safemode_timer);
552 atomic_set(&mddev->active, 1);
553 atomic_set(&mddev->openers, 0);
554 atomic_set(&mddev->active_io, 0);
555 atomic_set(&mddev->plug_cnt, 0);
556 spin_lock_init(&mddev->write_lock);
557 atomic_set(&mddev->flush_pending, 0);
558 init_waitqueue_head(&mddev->sb_wait);
559 init_waitqueue_head(&mddev->recovery_wait);
560 mddev->reshape_position = MaxSector;
561 mddev->resync_min = 0;
562 mddev->resync_max = MaxSector;
563 mddev->level = LEVEL_NONE;
564}
565EXPORT_SYMBOL_GPL(mddev_init);
566
567static mddev_t * mddev_find(dev_t unit)
568{
569 mddev_t *mddev, *new = NULL;
570
571 if (unit && MAJOR(unit) != MD_MAJOR)
572 unit &= ~((1<<MdpMinorShift)-1);
573
574 retry:
575 spin_lock(&all_mddevs_lock);
576
577 if (unit) {
578 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
579 if (mddev->unit == unit) {
580 mddev_get(mddev);
581 spin_unlock(&all_mddevs_lock);
582 kfree(new);
583 return mddev;
584 }
585
586 if (new) {
587 list_add(&new->all_mddevs, &all_mddevs);
588 spin_unlock(&all_mddevs_lock);
589 new->hold_active = UNTIL_IOCTL;
590 return new;
591 }
592 } else if (new) {
593
594 static int next_minor = 512;
595 int start = next_minor;
596 int is_free = 0;
597 int dev = 0;
598 while (!is_free) {
599 dev = MKDEV(MD_MAJOR, next_minor);
600 next_minor++;
601 if (next_minor > MINORMASK)
602 next_minor = 0;
603 if (next_minor == start) {
604
605 spin_unlock(&all_mddevs_lock);
606 kfree(new);
607 return NULL;
608 }
609
610 is_free = 1;
611 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
612 if (mddev->unit == dev) {
613 is_free = 0;
614 break;
615 }
616 }
617 new->unit = dev;
618 new->md_minor = MINOR(dev);
619 new->hold_active = UNTIL_STOP;
620 list_add(&new->all_mddevs, &all_mddevs);
621 spin_unlock(&all_mddevs_lock);
622 return new;
623 }
624 spin_unlock(&all_mddevs_lock);
625
626 new = kzalloc(sizeof(*new), GFP_KERNEL);
627 if (!new)
628 return NULL;
629
630 new->unit = unit;
631 if (MAJOR(unit) == MD_MAJOR)
632 new->md_minor = MINOR(unit);
633 else
634 new->md_minor = MINOR(unit) >> MdpMinorShift;
635
636 mddev_init(new);
637
638 goto retry;
639}
640
641static inline int mddev_lock(mddev_t * mddev)
642{
643 return mutex_lock_interruptible(&mddev->reconfig_mutex);
644}
645
646static inline int mddev_is_locked(mddev_t *mddev)
647{
648 return mutex_is_locked(&mddev->reconfig_mutex);
649}
650
651static inline int mddev_trylock(mddev_t * mddev)
652{
653 return mutex_trylock(&mddev->reconfig_mutex);
654}
655
656static struct attribute_group md_redundancy_group;
657
658static void mddev_unlock(mddev_t * mddev)
659{
660 if (mddev->to_remove) {
661
662
663
664
665
666
667
668
669
670
671
672
673 struct attribute_group *to_remove = mddev->to_remove;
674 mddev->to_remove = NULL;
675 mddev->sysfs_active = 1;
676 mutex_unlock(&mddev->reconfig_mutex);
677
678 if (mddev->kobj.sd) {
679 if (to_remove != &md_redundancy_group)
680 sysfs_remove_group(&mddev->kobj, to_remove);
681 if (mddev->pers == NULL ||
682 mddev->pers->sync_request == NULL) {
683 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
684 if (mddev->sysfs_action)
685 sysfs_put(mddev->sysfs_action);
686 mddev->sysfs_action = NULL;
687 }
688 }
689 mddev->sysfs_active = 0;
690 } else
691 mutex_unlock(&mddev->reconfig_mutex);
692
693 md_wakeup_thread(mddev->thread);
694}
695
696static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
697{
698 mdk_rdev_t *rdev;
699
700 list_for_each_entry(rdev, &mddev->disks, same_set)
701 if (rdev->desc_nr == nr)
702 return rdev;
703
704 return NULL;
705}
706
707static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
708{
709 mdk_rdev_t *rdev;
710
711 list_for_each_entry(rdev, &mddev->disks, same_set)
712 if (rdev->bdev->bd_dev == dev)
713 return rdev;
714
715 return NULL;
716}
717
718static struct mdk_personality *find_pers(int level, char *clevel)
719{
720 struct mdk_personality *pers;
721 list_for_each_entry(pers, &pers_list, list) {
722 if (level != LEVEL_NONE && pers->level == level)
723 return pers;
724 if (strcmp(pers->name, clevel)==0)
725 return pers;
726 }
727 return NULL;
728}
729
730
731static inline sector_t calc_dev_sboffset(mdk_rdev_t *rdev)
732{
733 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
734 return MD_NEW_SIZE_SECTORS(num_sectors);
735}
736
737static int alloc_disk_sb(mdk_rdev_t * rdev)
738{
739 if (rdev->sb_page)
740 MD_BUG();
741
742 rdev->sb_page = alloc_page(GFP_KERNEL);
743 if (!rdev->sb_page) {
744 printk(KERN_ALERT "md: out of memory.\n");
745 return -ENOMEM;
746 }
747
748 return 0;
749}
750
751static void free_disk_sb(mdk_rdev_t * rdev)
752{
753 if (rdev->sb_page) {
754 put_page(rdev->sb_page);
755 rdev->sb_loaded = 0;
756 rdev->sb_page = NULL;
757 rdev->sb_start = 0;
758 rdev->sectors = 0;
759 }
760}
761
762
763static void super_written(struct bio *bio, int error)
764{
765 mdk_rdev_t *rdev = bio->bi_private;
766 mddev_t *mddev = rdev->mddev;
767
768 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
769 printk("md: super_written gets error=%d, uptodate=%d\n",
770 error, test_bit(BIO_UPTODATE, &bio->bi_flags));
771 WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
772 md_error(mddev, rdev);
773 }
774
775 if (atomic_dec_and_test(&mddev->pending_writes))
776 wake_up(&mddev->sb_wait);
777 bio_put(bio);
778}
779
780void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
781 sector_t sector, int size, struct page *page)
782{
783
784
785
786
787
788
789 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
790
791 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
792 bio->bi_sector = sector;
793 bio_add_page(bio, page, size, 0);
794 bio->bi_private = rdev;
795 bio->bi_end_io = super_written;
796
797 atomic_inc(&mddev->pending_writes);
798 submit_bio(REQ_WRITE | REQ_SYNC | REQ_FLUSH | REQ_FUA, bio);
799}
800
801void md_super_wait(mddev_t *mddev)
802{
803
804 DEFINE_WAIT(wq);
805 for(;;) {
806 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
807 if (atomic_read(&mddev->pending_writes)==0)
808 break;
809 schedule();
810 }
811 finish_wait(&mddev->sb_wait, &wq);
812}
813
814static void bi_complete(struct bio *bio, int error)
815{
816 complete((struct completion*)bio->bi_private);
817}
818
819int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
820 struct page *page, int rw, bool metadata_op)
821{
822 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
823 struct completion event;
824 int ret;
825
826 rw |= REQ_SYNC;
827
828 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
829 rdev->meta_bdev : rdev->bdev;
830 if (metadata_op)
831 bio->bi_sector = sector + rdev->sb_start;
832 else
833 bio->bi_sector = sector + rdev->data_offset;
834 bio_add_page(bio, page, size, 0);
835 init_completion(&event);
836 bio->bi_private = &event;
837 bio->bi_end_io = bi_complete;
838 submit_bio(rw, bio);
839 wait_for_completion(&event);
840
841 ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
842 bio_put(bio);
843 return ret;
844}
845EXPORT_SYMBOL_GPL(sync_page_io);
846
847static int read_disk_sb(mdk_rdev_t * rdev, int size)
848{
849 char b[BDEVNAME_SIZE];
850 if (!rdev->sb_page) {
851 MD_BUG();
852 return -EINVAL;
853 }
854 if (rdev->sb_loaded)
855 return 0;
856
857
858 if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
859 goto fail;
860 rdev->sb_loaded = 1;
861 return 0;
862
863fail:
864 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
865 bdevname(rdev->bdev,b));
866 return -EINVAL;
867}
868
869static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
870{
871 return sb1->set_uuid0 == sb2->set_uuid0 &&
872 sb1->set_uuid1 == sb2->set_uuid1 &&
873 sb1->set_uuid2 == sb2->set_uuid2 &&
874 sb1->set_uuid3 == sb2->set_uuid3;
875}
876
877static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
878{
879 int ret;
880 mdp_super_t *tmp1, *tmp2;
881
882 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
883 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
884
885 if (!tmp1 || !tmp2) {
886 ret = 0;
887 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
888 goto abort;
889 }
890
891 *tmp1 = *sb1;
892 *tmp2 = *sb2;
893
894
895
896
897 tmp1->nr_disks = 0;
898 tmp2->nr_disks = 0;
899
900 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
901abort:
902 kfree(tmp1);
903 kfree(tmp2);
904 return ret;
905}
906
907
908static u32 md_csum_fold(u32 csum)
909{
910 csum = (csum & 0xffff) + (csum >> 16);
911 return (csum & 0xffff) + (csum >> 16);
912}
913
914static unsigned int calc_sb_csum(mdp_super_t * sb)
915{
916 u64 newcsum = 0;
917 u32 *sb32 = (u32*)sb;
918 int i;
919 unsigned int disk_csum, csum;
920
921 disk_csum = sb->sb_csum;
922 sb->sb_csum = 0;
923
924 for (i = 0; i < MD_SB_BYTES/4 ; i++)
925 newcsum += sb32[i];
926 csum = (newcsum & 0xffffffff) + (newcsum>>32);
927
928
929#ifdef CONFIG_ALPHA
930
931
932
933
934
935
936
937
938 sb->sb_csum = md_csum_fold(disk_csum);
939#else
940 sb->sb_csum = disk_csum;
941#endif
942 return csum;
943}
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976struct super_type {
977 char *name;
978 struct module *owner;
979 int (*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev,
980 int minor_version);
981 int (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev);
982 void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev);
983 unsigned long long (*rdev_size_change)(mdk_rdev_t *rdev,
984 sector_t num_sectors);
985};
986
987
988
989
990
991
992
993
994
995int md_check_no_bitmap(mddev_t *mddev)
996{
997 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
998 return 0;
999 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
1000 mdname(mddev), mddev->pers->name);
1001 return 1;
1002}
1003EXPORT_SYMBOL(md_check_no_bitmap);
1004
1005
1006
1007
1008static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
1009{
1010 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1011 mdp_super_t *sb;
1012 int ret;
1013
1014
1015
1016
1017
1018
1019
1020 rdev->sb_start = calc_dev_sboffset(rdev);
1021
1022 ret = read_disk_sb(rdev, MD_SB_BYTES);
1023 if (ret) return ret;
1024
1025 ret = -EINVAL;
1026
1027 bdevname(rdev->bdev, b);
1028 sb = (mdp_super_t*)page_address(rdev->sb_page);
1029
1030 if (sb->md_magic != MD_SB_MAGIC) {
1031 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
1032 b);
1033 goto abort;
1034 }
1035
1036 if (sb->major_version != 0 ||
1037 sb->minor_version < 90 ||
1038 sb->minor_version > 91) {
1039 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
1040 sb->major_version, sb->minor_version,
1041 b);
1042 goto abort;
1043 }
1044
1045 if (sb->raid_disks <= 0)
1046 goto abort;
1047
1048 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1049 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
1050 b);
1051 goto abort;
1052 }
1053
1054 rdev->preferred_minor = sb->md_minor;
1055 rdev->data_offset = 0;
1056 rdev->sb_size = MD_SB_BYTES;
1057
1058 if (sb->level == LEVEL_MULTIPATH)
1059 rdev->desc_nr = -1;
1060 else
1061 rdev->desc_nr = sb->this_disk.number;
1062
1063 if (!refdev) {
1064 ret = 1;
1065 } else {
1066 __u64 ev1, ev2;
1067 mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page);
1068 if (!uuid_equal(refsb, sb)) {
1069 printk(KERN_WARNING "md: %s has different UUID to %s\n",
1070 b, bdevname(refdev->bdev,b2));
1071 goto abort;
1072 }
1073 if (!sb_equal(refsb, sb)) {
1074 printk(KERN_WARNING "md: %s has same UUID"
1075 " but different superblock to %s\n",
1076 b, bdevname(refdev->bdev, b2));
1077 goto abort;
1078 }
1079 ev1 = md_event(sb);
1080 ev2 = md_event(refsb);
1081 if (ev1 > ev2)
1082 ret = 1;
1083 else
1084 ret = 0;
1085 }
1086 rdev->sectors = rdev->sb_start;
1087
1088 if (rdev->sectors < sb->size * 2 && sb->level > 1)
1089
1090 ret = -EINVAL;
1091
1092 abort:
1093 return ret;
1094}
1095
1096
1097
1098
1099static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1100{
1101 mdp_disk_t *desc;
1102 mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
1103 __u64 ev1 = md_event(sb);
1104
1105 rdev->raid_disk = -1;
1106 clear_bit(Faulty, &rdev->flags);
1107 clear_bit(In_sync, &rdev->flags);
1108 clear_bit(WriteMostly, &rdev->flags);
1109
1110 if (mddev->raid_disks == 0) {
1111 mddev->major_version = 0;
1112 mddev->minor_version = sb->minor_version;
1113 mddev->patch_version = sb->patch_version;
1114 mddev->external = 0;
1115 mddev->chunk_sectors = sb->chunk_size >> 9;
1116 mddev->ctime = sb->ctime;
1117 mddev->utime = sb->utime;
1118 mddev->level = sb->level;
1119 mddev->clevel[0] = 0;
1120 mddev->layout = sb->layout;
1121 mddev->raid_disks = sb->raid_disks;
1122 mddev->dev_sectors = sb->size * 2;
1123 mddev->events = ev1;
1124 mddev->bitmap_info.offset = 0;
1125 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1126
1127 if (mddev->minor_version >= 91) {
1128 mddev->reshape_position = sb->reshape_position;
1129 mddev->delta_disks = sb->delta_disks;
1130 mddev->new_level = sb->new_level;
1131 mddev->new_layout = sb->new_layout;
1132 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1133 } else {
1134 mddev->reshape_position = MaxSector;
1135 mddev->delta_disks = 0;
1136 mddev->new_level = mddev->level;
1137 mddev->new_layout = mddev->layout;
1138 mddev->new_chunk_sectors = mddev->chunk_sectors;
1139 }
1140
1141 if (sb->state & (1<<MD_SB_CLEAN))
1142 mddev->recovery_cp = MaxSector;
1143 else {
1144 if (sb->events_hi == sb->cp_events_hi &&
1145 sb->events_lo == sb->cp_events_lo) {
1146 mddev->recovery_cp = sb->recovery_cp;
1147 } else
1148 mddev->recovery_cp = 0;
1149 }
1150
1151 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1152 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1153 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1154 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1155
1156 mddev->max_disks = MD_SB_DISKS;
1157
1158 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1159 mddev->bitmap_info.file == NULL)
1160 mddev->bitmap_info.offset =
1161 mddev->bitmap_info.default_offset;
1162
1163 } else if (mddev->pers == NULL) {
1164
1165
1166 ++ev1;
1167 if (sb->disks[rdev->desc_nr].state & (
1168 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1169 if (ev1 < mddev->events)
1170 return -EINVAL;
1171 } else if (mddev->bitmap) {
1172
1173
1174
1175 if (ev1 < mddev->bitmap->events_cleared)
1176 return 0;
1177 } else {
1178 if (ev1 < mddev->events)
1179
1180 return 0;
1181 }
1182
1183 if (mddev->level != LEVEL_MULTIPATH) {
1184 desc = sb->disks + rdev->desc_nr;
1185
1186 if (desc->state & (1<<MD_DISK_FAULTY))
1187 set_bit(Faulty, &rdev->flags);
1188 else if (desc->state & (1<<MD_DISK_SYNC)
1189) {
1190 set_bit(In_sync, &rdev->flags);
1191 rdev->raid_disk = desc->raid_disk;
1192 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1193
1194
1195
1196 if (mddev->minor_version >= 91) {
1197 rdev->recovery_offset = 0;
1198 rdev->raid_disk = desc->raid_disk;
1199 }
1200 }
1201 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1202 set_bit(WriteMostly, &rdev->flags);
1203 } else
1204 set_bit(In_sync, &rdev->flags);
1205 return 0;
1206}
1207
1208
1209
1210
1211static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1212{
1213 mdp_super_t *sb;
1214 mdk_rdev_t *rdev2;
1215 int next_spare = mddev->raid_disks;
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228 int i;
1229 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1230
1231 rdev->sb_size = MD_SB_BYTES;
1232
1233 sb = (mdp_super_t*)page_address(rdev->sb_page);
1234
1235 memset(sb, 0, sizeof(*sb));
1236
1237 sb->md_magic = MD_SB_MAGIC;
1238 sb->major_version = mddev->major_version;
1239 sb->patch_version = mddev->patch_version;
1240 sb->gvalid_words = 0;
1241 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1242 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1243 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1244 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1245
1246 sb->ctime = mddev->ctime;
1247 sb->level = mddev->level;
1248 sb->size = mddev->dev_sectors / 2;
1249 sb->raid_disks = mddev->raid_disks;
1250 sb->md_minor = mddev->md_minor;
1251 sb->not_persistent = 0;
1252 sb->utime = mddev->utime;
1253 sb->state = 0;
1254 sb->events_hi = (mddev->events>>32);
1255 sb->events_lo = (u32)mddev->events;
1256
1257 if (mddev->reshape_position == MaxSector)
1258 sb->minor_version = 90;
1259 else {
1260 sb->minor_version = 91;
1261 sb->reshape_position = mddev->reshape_position;
1262 sb->new_level = mddev->new_level;
1263 sb->delta_disks = mddev->delta_disks;
1264 sb->new_layout = mddev->new_layout;
1265 sb->new_chunk = mddev->new_chunk_sectors << 9;
1266 }
1267 mddev->minor_version = sb->minor_version;
1268 if (mddev->in_sync)
1269 {
1270 sb->recovery_cp = mddev->recovery_cp;
1271 sb->cp_events_hi = (mddev->events>>32);
1272 sb->cp_events_lo = (u32)mddev->events;
1273 if (mddev->recovery_cp == MaxSector)
1274 sb->state = (1<< MD_SB_CLEAN);
1275 } else
1276 sb->recovery_cp = 0;
1277
1278 sb->layout = mddev->layout;
1279 sb->chunk_size = mddev->chunk_sectors << 9;
1280
1281 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1282 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1283
1284 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1285 list_for_each_entry(rdev2, &mddev->disks, same_set) {
1286 mdp_disk_t *d;
1287 int desc_nr;
1288 int is_active = test_bit(In_sync, &rdev2->flags);
1289
1290 if (rdev2->raid_disk >= 0 &&
1291 sb->minor_version >= 91)
1292
1293
1294
1295
1296 is_active = 1;
1297 if (rdev2->raid_disk < 0 ||
1298 test_bit(Faulty, &rdev2->flags))
1299 is_active = 0;
1300 if (is_active)
1301 desc_nr = rdev2->raid_disk;
1302 else
1303 desc_nr = next_spare++;
1304 rdev2->desc_nr = desc_nr;
1305 d = &sb->disks[rdev2->desc_nr];
1306 nr_disks++;
1307 d->number = rdev2->desc_nr;
1308 d->major = MAJOR(rdev2->bdev->bd_dev);
1309 d->minor = MINOR(rdev2->bdev->bd_dev);
1310 if (is_active)
1311 d->raid_disk = rdev2->raid_disk;
1312 else
1313 d->raid_disk = rdev2->desc_nr;
1314 if (test_bit(Faulty, &rdev2->flags))
1315 d->state = (1<<MD_DISK_FAULTY);
1316 else if (is_active) {
1317 d->state = (1<<MD_DISK_ACTIVE);
1318 if (test_bit(In_sync, &rdev2->flags))
1319 d->state |= (1<<MD_DISK_SYNC);
1320 active++;
1321 working++;
1322 } else {
1323 d->state = 0;
1324 spare++;
1325 working++;
1326 }
1327 if (test_bit(WriteMostly, &rdev2->flags))
1328 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1329 }
1330
1331 for (i=0 ; i < mddev->raid_disks ; i++) {
1332 mdp_disk_t *d = &sb->disks[i];
1333 if (d->state == 0 && d->number == 0) {
1334 d->number = i;
1335 d->raid_disk = i;
1336 d->state = (1<<MD_DISK_REMOVED);
1337 d->state |= (1<<MD_DISK_FAULTY);
1338 failed++;
1339 }
1340 }
1341 sb->nr_disks = nr_disks;
1342 sb->active_disks = active;
1343 sb->working_disks = working;
1344 sb->failed_disks = failed;
1345 sb->spare_disks = spare;
1346
1347 sb->this_disk = sb->disks[rdev->desc_nr];
1348 sb->sb_csum = calc_sb_csum(sb);
1349}
1350
1351
1352
1353
1354static unsigned long long
1355super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1356{
1357 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1358 return 0;
1359 if (rdev->mddev->bitmap_info.offset)
1360 return 0;
1361 rdev->sb_start = calc_dev_sboffset(rdev);
1362 if (!num_sectors || num_sectors > rdev->sb_start)
1363 num_sectors = rdev->sb_start;
1364 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1365 rdev->sb_page);
1366 md_super_wait(rdev->mddev);
1367 return num_sectors;
1368}
1369
1370
1371
1372
1373
1374
1375static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1376{
1377 __le32 disk_csum;
1378 u32 csum;
1379 unsigned long long newcsum;
1380 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1381 __le32 *isuper = (__le32*)sb;
1382 int i;
1383
1384 disk_csum = sb->sb_csum;
1385 sb->sb_csum = 0;
1386 newcsum = 0;
1387 for (i=0; size>=4; size -= 4 )
1388 newcsum += le32_to_cpu(*isuper++);
1389
1390 if (size == 2)
1391 newcsum += le16_to_cpu(*(__le16*) isuper);
1392
1393 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1394 sb->sb_csum = disk_csum;
1395 return cpu_to_le32(csum);
1396}
1397
1398static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
1399{
1400 struct mdp_superblock_1 *sb;
1401 int ret;
1402 sector_t sb_start;
1403 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1404 int bmask;
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414 switch(minor_version) {
1415 case 0:
1416 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1417 sb_start -= 8*2;
1418 sb_start &= ~(sector_t)(4*2-1);
1419 break;
1420 case 1:
1421 sb_start = 0;
1422 break;
1423 case 2:
1424 sb_start = 8;
1425 break;
1426 default:
1427 return -EINVAL;
1428 }
1429 rdev->sb_start = sb_start;
1430
1431
1432
1433
1434 ret = read_disk_sb(rdev, 4096);
1435 if (ret) return ret;
1436
1437
1438 sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1439
1440 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1441 sb->major_version != cpu_to_le32(1) ||
1442 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1443 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1444 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1445 return -EINVAL;
1446
1447 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1448 printk("md: invalid superblock checksum on %s\n",
1449 bdevname(rdev->bdev,b));
1450 return -EINVAL;
1451 }
1452 if (le64_to_cpu(sb->data_size) < 10) {
1453 printk("md: data_size too small on %s\n",
1454 bdevname(rdev->bdev,b));
1455 return -EINVAL;
1456 }
1457
1458 rdev->preferred_minor = 0xffff;
1459 rdev->data_offset = le64_to_cpu(sb->data_offset);
1460 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1461
1462 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1463 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1464 if (rdev->sb_size & bmask)
1465 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1466
1467 if (minor_version
1468 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1469 return -EINVAL;
1470
1471 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1472 rdev->desc_nr = -1;
1473 else
1474 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1475
1476 if (!refdev) {
1477 ret = 1;
1478 } else {
1479 __u64 ev1, ev2;
1480 struct mdp_superblock_1 *refsb =
1481 (struct mdp_superblock_1*)page_address(refdev->sb_page);
1482
1483 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1484 sb->level != refsb->level ||
1485 sb->layout != refsb->layout ||
1486 sb->chunksize != refsb->chunksize) {
1487 printk(KERN_WARNING "md: %s has strangely different"
1488 " superblock to %s\n",
1489 bdevname(rdev->bdev,b),
1490 bdevname(refdev->bdev,b2));
1491 return -EINVAL;
1492 }
1493 ev1 = le64_to_cpu(sb->events);
1494 ev2 = le64_to_cpu(refsb->events);
1495
1496 if (ev1 > ev2)
1497 ret = 1;
1498 else
1499 ret = 0;
1500 }
1501 if (minor_version)
1502 rdev->sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
1503 le64_to_cpu(sb->data_offset);
1504 else
1505 rdev->sectors = rdev->sb_start;
1506 if (rdev->sectors < le64_to_cpu(sb->data_size))
1507 return -EINVAL;
1508 rdev->sectors = le64_to_cpu(sb->data_size);
1509 if (le64_to_cpu(sb->size) > rdev->sectors)
1510 return -EINVAL;
1511 return ret;
1512}
1513
1514static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1515{
1516 struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1517 __u64 ev1 = le64_to_cpu(sb->events);
1518
1519 rdev->raid_disk = -1;
1520 clear_bit(Faulty, &rdev->flags);
1521 clear_bit(In_sync, &rdev->flags);
1522 clear_bit(WriteMostly, &rdev->flags);
1523
1524 if (mddev->raid_disks == 0) {
1525 mddev->major_version = 1;
1526 mddev->patch_version = 0;
1527 mddev->external = 0;
1528 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1529 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1530 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
1531 mddev->level = le32_to_cpu(sb->level);
1532 mddev->clevel[0] = 0;
1533 mddev->layout = le32_to_cpu(sb->layout);
1534 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1535 mddev->dev_sectors = le64_to_cpu(sb->size);
1536 mddev->events = ev1;
1537 mddev->bitmap_info.offset = 0;
1538 mddev->bitmap_info.default_offset = 1024 >> 9;
1539
1540 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1541 memcpy(mddev->uuid, sb->set_uuid, 16);
1542
1543 mddev->max_disks = (4096-256)/2;
1544
1545 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1546 mddev->bitmap_info.file == NULL )
1547 mddev->bitmap_info.offset =
1548 (__s32)le32_to_cpu(sb->bitmap_offset);
1549
1550 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1551 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1552 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1553 mddev->new_level = le32_to_cpu(sb->new_level);
1554 mddev->new_layout = le32_to_cpu(sb->new_layout);
1555 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1556 } else {
1557 mddev->reshape_position = MaxSector;
1558 mddev->delta_disks = 0;
1559 mddev->new_level = mddev->level;
1560 mddev->new_layout = mddev->layout;
1561 mddev->new_chunk_sectors = mddev->chunk_sectors;
1562 }
1563
1564 } else if (mddev->pers == NULL) {
1565
1566
1567 ++ev1;
1568 if (rdev->desc_nr >= 0 &&
1569 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1570 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < 0xfffe)
1571 if (ev1 < mddev->events)
1572 return -EINVAL;
1573 } else if (mddev->bitmap) {
1574
1575
1576
1577 if (ev1 < mddev->bitmap->events_cleared)
1578 return 0;
1579 } else {
1580 if (ev1 < mddev->events)
1581
1582 return 0;
1583 }
1584 if (mddev->level != LEVEL_MULTIPATH) {
1585 int role;
1586 if (rdev->desc_nr < 0 ||
1587 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1588 role = 0xffff;
1589 rdev->desc_nr = -1;
1590 } else
1591 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1592 switch(role) {
1593 case 0xffff:
1594 break;
1595 case 0xfffe:
1596 set_bit(Faulty, &rdev->flags);
1597 break;
1598 default:
1599 if ((le32_to_cpu(sb->feature_map) &
1600 MD_FEATURE_RECOVERY_OFFSET))
1601 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1602 else
1603 set_bit(In_sync, &rdev->flags);
1604 rdev->raid_disk = role;
1605 break;
1606 }
1607 if (sb->devflags & WriteMostly1)
1608 set_bit(WriteMostly, &rdev->flags);
1609 } else
1610 set_bit(In_sync, &rdev->flags);
1611
1612 return 0;
1613}
1614
1615static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1616{
1617 struct mdp_superblock_1 *sb;
1618 mdk_rdev_t *rdev2;
1619 int max_dev, i;
1620
1621
1622 sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
1623
1624 sb->feature_map = 0;
1625 sb->pad0 = 0;
1626 sb->recovery_offset = cpu_to_le64(0);
1627 memset(sb->pad1, 0, sizeof(sb->pad1));
1628 memset(sb->pad2, 0, sizeof(sb->pad2));
1629 memset(sb->pad3, 0, sizeof(sb->pad3));
1630
1631 sb->utime = cpu_to_le64((__u64)mddev->utime);
1632 sb->events = cpu_to_le64(mddev->events);
1633 if (mddev->in_sync)
1634 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1635 else
1636 sb->resync_offset = cpu_to_le64(0);
1637
1638 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1639
1640 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1641 sb->size = cpu_to_le64(mddev->dev_sectors);
1642 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1643 sb->level = cpu_to_le32(mddev->level);
1644 sb->layout = cpu_to_le32(mddev->layout);
1645
1646 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1647 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1648 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1649 }
1650
1651 if (rdev->raid_disk >= 0 &&
1652 !test_bit(In_sync, &rdev->flags)) {
1653 sb->feature_map |=
1654 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1655 sb->recovery_offset =
1656 cpu_to_le64(rdev->recovery_offset);
1657 }
1658
1659 if (mddev->reshape_position != MaxSector) {
1660 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1661 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1662 sb->new_layout = cpu_to_le32(mddev->new_layout);
1663 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1664 sb->new_level = cpu_to_le32(mddev->new_level);
1665 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1666 }
1667
1668 max_dev = 0;
1669 list_for_each_entry(rdev2, &mddev->disks, same_set)
1670 if (rdev2->desc_nr+1 > max_dev)
1671 max_dev = rdev2->desc_nr+1;
1672
1673 if (max_dev > le32_to_cpu(sb->max_dev)) {
1674 int bmask;
1675 sb->max_dev = cpu_to_le32(max_dev);
1676 rdev->sb_size = max_dev * 2 + 256;
1677 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1678 if (rdev->sb_size & bmask)
1679 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1680 } else
1681 max_dev = le32_to_cpu(sb->max_dev);
1682
1683 for (i=0; i<max_dev;i++)
1684 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1685
1686 list_for_each_entry(rdev2, &mddev->disks, same_set) {
1687 i = rdev2->desc_nr;
1688 if (test_bit(Faulty, &rdev2->flags))
1689 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1690 else if (test_bit(In_sync, &rdev2->flags))
1691 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1692 else if (rdev2->raid_disk >= 0)
1693 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1694 else
1695 sb->dev_roles[i] = cpu_to_le16(0xffff);
1696 }
1697
1698 sb->sb_csum = calc_sb_1_csum(sb);
1699}
1700
1701static unsigned long long
1702super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1703{
1704 struct mdp_superblock_1 *sb;
1705 sector_t max_sectors;
1706 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1707 return 0;
1708 if (rdev->sb_start < rdev->data_offset) {
1709
1710 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1711 max_sectors -= rdev->data_offset;
1712 if (!num_sectors || num_sectors > max_sectors)
1713 num_sectors = max_sectors;
1714 } else if (rdev->mddev->bitmap_info.offset) {
1715
1716 return 0;
1717 } else {
1718
1719 sector_t sb_start;
1720 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1721 sb_start &= ~(sector_t)(4*2 - 1);
1722 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1723 if (!num_sectors || num_sectors > max_sectors)
1724 num_sectors = max_sectors;
1725 rdev->sb_start = sb_start;
1726 }
1727 sb = (struct mdp_superblock_1 *) page_address(rdev->sb_page);
1728 sb->data_size = cpu_to_le64(num_sectors);
1729 sb->super_offset = rdev->sb_start;
1730 sb->sb_csum = calc_sb_1_csum(sb);
1731 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1732 rdev->sb_page);
1733 md_super_wait(rdev->mddev);
1734 return num_sectors;
1735}
1736
1737static struct super_type super_types[] = {
1738 [0] = {
1739 .name = "0.90.0",
1740 .owner = THIS_MODULE,
1741 .load_super = super_90_load,
1742 .validate_super = super_90_validate,
1743 .sync_super = super_90_sync,
1744 .rdev_size_change = super_90_rdev_size_change,
1745 },
1746 [1] = {
1747 .name = "md-1",
1748 .owner = THIS_MODULE,
1749 .load_super = super_1_load,
1750 .validate_super = super_1_validate,
1751 .sync_super = super_1_sync,
1752 .rdev_size_change = super_1_rdev_size_change,
1753 },
1754};
1755
1756static void sync_super(mddev_t *mddev, mdk_rdev_t *rdev)
1757{
1758 if (mddev->sync_super) {
1759 mddev->sync_super(mddev, rdev);
1760 return;
1761 }
1762
1763 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1764
1765 super_types[mddev->major_version].sync_super(mddev, rdev);
1766}
1767
1768static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
1769{
1770 mdk_rdev_t *rdev, *rdev2;
1771
1772 rcu_read_lock();
1773 rdev_for_each_rcu(rdev, mddev1)
1774 rdev_for_each_rcu(rdev2, mddev2)
1775 if (rdev->bdev->bd_contains ==
1776 rdev2->bdev->bd_contains) {
1777 rcu_read_unlock();
1778 return 1;
1779 }
1780 rcu_read_unlock();
1781 return 0;
1782}
1783
1784static LIST_HEAD(pending_raid_disks);
1785
1786
1787
1788
1789
1790
1791
1792
1793int md_integrity_register(mddev_t *mddev)
1794{
1795 mdk_rdev_t *rdev, *reference = NULL;
1796
1797 if (list_empty(&mddev->disks))
1798 return 0;
1799 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
1800 return 0;
1801 list_for_each_entry(rdev, &mddev->disks, same_set) {
1802
1803 if (test_bit(Faulty, &rdev->flags))
1804 continue;
1805 if (rdev->raid_disk < 0)
1806 continue;
1807 if (!reference) {
1808
1809 reference = rdev;
1810 continue;
1811 }
1812
1813 if (blk_integrity_compare(reference->bdev->bd_disk,
1814 rdev->bdev->bd_disk) < 0)
1815 return -EINVAL;
1816 }
1817 if (!reference || !bdev_get_integrity(reference->bdev))
1818 return 0;
1819
1820
1821
1822
1823 if (blk_integrity_register(mddev->gendisk,
1824 bdev_get_integrity(reference->bdev)) != 0) {
1825 printk(KERN_ERR "md: failed to register integrity for %s\n",
1826 mdname(mddev));
1827 return -EINVAL;
1828 }
1829 printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
1830 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
1831 printk(KERN_ERR "md: failed to create integrity pool for %s\n",
1832 mdname(mddev));
1833 return -EINVAL;
1834 }
1835 return 0;
1836}
1837EXPORT_SYMBOL(md_integrity_register);
1838
1839
1840void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
1841{
1842 struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
1843 struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk);
1844
1845 if (!bi_mddev)
1846 return;
1847 if (rdev->raid_disk < 0)
1848 return;
1849 if (bi_rdev && blk_integrity_compare(mddev->gendisk,
1850 rdev->bdev->bd_disk) >= 0)
1851 return;
1852 printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
1853 blk_integrity_unregister(mddev->gendisk);
1854}
1855EXPORT_SYMBOL(md_integrity_add_rdev);
1856
1857static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1858{
1859 char b[BDEVNAME_SIZE];
1860 struct kobject *ko;
1861 char *s;
1862 int err;
1863
1864 if (rdev->mddev) {
1865 MD_BUG();
1866 return -EINVAL;
1867 }
1868
1869
1870 if (find_rdev(mddev, rdev->bdev->bd_dev))
1871 return -EEXIST;
1872
1873
1874 if (rdev->sectors && (mddev->dev_sectors == 0 ||
1875 rdev->sectors < mddev->dev_sectors)) {
1876 if (mddev->pers) {
1877
1878
1879
1880
1881 if (mddev->level > 0)
1882 return -ENOSPC;
1883 } else
1884 mddev->dev_sectors = rdev->sectors;
1885 }
1886
1887
1888
1889
1890
1891 if (rdev->desc_nr < 0) {
1892 int choice = 0;
1893 if (mddev->pers) choice = mddev->raid_disks;
1894 while (find_rdev_nr(mddev, choice))
1895 choice++;
1896 rdev->desc_nr = choice;
1897 } else {
1898 if (find_rdev_nr(mddev, rdev->desc_nr))
1899 return -EBUSY;
1900 }
1901 if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
1902 printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
1903 mdname(mddev), mddev->max_disks);
1904 return -EBUSY;
1905 }
1906 bdevname(rdev->bdev,b);
1907 while ( (s=strchr(b, '/')) != NULL)
1908 *s = '!';
1909
1910 rdev->mddev = mddev;
1911 printk(KERN_INFO "md: bind<%s>\n", b);
1912
1913 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
1914 goto fail;
1915
1916 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
1917 if (sysfs_create_link(&rdev->kobj, ko, "block"))
1918 ;
1919 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
1920
1921 list_add_rcu(&rdev->same_set, &mddev->disks);
1922 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
1923
1924
1925 mddev->recovery_disabled = 0;
1926
1927 return 0;
1928
1929 fail:
1930 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
1931 b, mdname(mddev));
1932 return err;
1933}
1934
1935static void md_delayed_delete(struct work_struct *ws)
1936{
1937 mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work);
1938 kobject_del(&rdev->kobj);
1939 kobject_put(&rdev->kobj);
1940}
1941
1942static void unbind_rdev_from_array(mdk_rdev_t * rdev)
1943{
1944 char b[BDEVNAME_SIZE];
1945 if (!rdev->mddev) {
1946 MD_BUG();
1947 return;
1948 }
1949 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
1950 list_del_rcu(&rdev->same_set);
1951 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
1952 rdev->mddev = NULL;
1953 sysfs_remove_link(&rdev->kobj, "block");
1954 sysfs_put(rdev->sysfs_state);
1955 rdev->sysfs_state = NULL;
1956
1957
1958
1959
1960 synchronize_rcu();
1961 INIT_WORK(&rdev->del_work, md_delayed_delete);
1962 kobject_get(&rdev->kobj);
1963 queue_work(md_misc_wq, &rdev->del_work);
1964}
1965
1966
1967
1968
1969
1970
1971static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
1972{
1973 int err = 0;
1974 struct block_device *bdev;
1975 char b[BDEVNAME_SIZE];
1976
1977 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1978 shared ? (mdk_rdev_t *)lock_rdev : rdev);
1979 if (IS_ERR(bdev)) {
1980 printk(KERN_ERR "md: could not open %s.\n",
1981 __bdevname(dev, b));
1982 return PTR_ERR(bdev);
1983 }
1984 rdev->bdev = bdev;
1985 return err;
1986}
1987
1988static void unlock_rdev(mdk_rdev_t *rdev)
1989{
1990 struct block_device *bdev = rdev->bdev;
1991 rdev->bdev = NULL;
1992 if (!bdev)
1993 MD_BUG();
1994 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1995}
1996
1997void md_autodetect_dev(dev_t dev);
1998
1999static void export_rdev(mdk_rdev_t * rdev)
2000{
2001 char b[BDEVNAME_SIZE];
2002 printk(KERN_INFO "md: export_rdev(%s)\n",
2003 bdevname(rdev->bdev,b));
2004 if (rdev->mddev)
2005 MD_BUG();
2006 free_disk_sb(rdev);
2007#ifndef MODULE
2008 if (test_bit(AutoDetected, &rdev->flags))
2009 md_autodetect_dev(rdev->bdev->bd_dev);
2010#endif
2011 unlock_rdev(rdev);
2012 kobject_put(&rdev->kobj);
2013}
2014
2015static void kick_rdev_from_array(mdk_rdev_t * rdev)
2016{
2017 unbind_rdev_from_array(rdev);
2018 export_rdev(rdev);
2019}
2020
2021static void export_array(mddev_t *mddev)
2022{
2023 mdk_rdev_t *rdev, *tmp;
2024
2025 rdev_for_each(rdev, tmp, mddev) {
2026 if (!rdev->mddev) {
2027 MD_BUG();
2028 continue;
2029 }
2030 kick_rdev_from_array(rdev);
2031 }
2032 if (!list_empty(&mddev->disks))
2033 MD_BUG();
2034 mddev->raid_disks = 0;
2035 mddev->major_version = 0;
2036}
2037
2038static void print_desc(mdp_disk_t *desc)
2039{
2040 printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
2041 desc->major,desc->minor,desc->raid_disk,desc->state);
2042}
2043
2044static void print_sb_90(mdp_super_t *sb)
2045{
2046 int i;
2047
2048 printk(KERN_INFO
2049 "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
2050 sb->major_version, sb->minor_version, sb->patch_version,
2051 sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
2052 sb->ctime);
2053 printk(KERN_INFO "md: L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
2054 sb->level, sb->size, sb->nr_disks, sb->raid_disks,
2055 sb->md_minor, sb->layout, sb->chunk_size);
2056 printk(KERN_INFO "md: UT:%08x ST:%d AD:%d WD:%d"
2057 " FD:%d SD:%d CSUM:%08x E:%08lx\n",
2058 sb->utime, sb->state, sb->active_disks, sb->working_disks,
2059 sb->failed_disks, sb->spare_disks,
2060 sb->sb_csum, (unsigned long)sb->events_lo);
2061
2062 printk(KERN_INFO);
2063 for (i = 0; i < MD_SB_DISKS; i++) {
2064 mdp_disk_t *desc;
2065
2066 desc = sb->disks + i;
2067 if (desc->number || desc->major || desc->minor ||
2068 desc->raid_disk || (desc->state && (desc->state != 4))) {
2069 printk(" D %2d: ", i);
2070 print_desc(desc);
2071 }
2072 }
2073 printk(KERN_INFO "md: THIS: ");
2074 print_desc(&sb->this_disk);
2075}
2076
2077static void print_sb_1(struct mdp_superblock_1 *sb)
2078{
2079 __u8 *uuid;
2080
2081 uuid = sb->set_uuid;
2082 printk(KERN_INFO
2083 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n"
2084 "md: Name: \"%s\" CT:%llu\n",
2085 le32_to_cpu(sb->major_version),
2086 le32_to_cpu(sb->feature_map),
2087 uuid,
2088 sb->set_name,
2089 (unsigned long long)le64_to_cpu(sb->ctime)
2090 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
2091
2092 uuid = sb->device_uuid;
2093 printk(KERN_INFO
2094 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
2095 " RO:%llu\n"
2096 "md: Dev:%08x UUID: %pU\n"
2097 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
2098 "md: (MaxDev:%u) \n",
2099 le32_to_cpu(sb->level),
2100 (unsigned long long)le64_to_cpu(sb->size),
2101 le32_to_cpu(sb->raid_disks),
2102 le32_to_cpu(sb->layout),
2103 le32_to_cpu(sb->chunksize),
2104 (unsigned long long)le64_to_cpu(sb->data_offset),
2105 (unsigned long long)le64_to_cpu(sb->data_size),
2106 (unsigned long long)le64_to_cpu(sb->super_offset),
2107 (unsigned long long)le64_to_cpu(sb->recovery_offset),
2108 le32_to_cpu(sb->dev_number),
2109 uuid,
2110 sb->devflags,
2111 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
2112 (unsigned long long)le64_to_cpu(sb->events),
2113 (unsigned long long)le64_to_cpu(sb->resync_offset),
2114 le32_to_cpu(sb->sb_csum),
2115 le32_to_cpu(sb->max_dev)
2116 );
2117}
2118
2119static void print_rdev(mdk_rdev_t *rdev, int major_version)
2120{
2121 char b[BDEVNAME_SIZE];
2122 printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
2123 bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
2124 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
2125 rdev->desc_nr);
2126 if (rdev->sb_loaded) {
2127 printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
2128 switch (major_version) {
2129 case 0:
2130 print_sb_90((mdp_super_t*)page_address(rdev->sb_page));
2131 break;
2132 case 1:
2133 print_sb_1((struct mdp_superblock_1 *)page_address(rdev->sb_page));
2134 break;
2135 }
2136 } else
2137 printk(KERN_INFO "md: no rdev superblock!\n");
2138}
2139
2140static void md_print_devices(void)
2141{
2142 struct list_head *tmp;
2143 mdk_rdev_t *rdev;
2144 mddev_t *mddev;
2145 char b[BDEVNAME_SIZE];
2146
2147 printk("\n");
2148 printk("md: **********************************\n");
2149 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
2150 printk("md: **********************************\n");
2151 for_each_mddev(mddev, tmp) {
2152
2153 if (mddev->bitmap)
2154 bitmap_print_sb(mddev->bitmap);
2155 else
2156 printk("%s: ", mdname(mddev));
2157 list_for_each_entry(rdev, &mddev->disks, same_set)
2158 printk("<%s>", bdevname(rdev->bdev,b));
2159 printk("\n");
2160
2161 list_for_each_entry(rdev, &mddev->disks, same_set)
2162 print_rdev(rdev, mddev->major_version);
2163 }
2164 printk("md: **********************************\n");
2165 printk("\n");
2166}
2167
2168
2169static void sync_sbs(mddev_t * mddev, int nospares)
2170{
2171
2172
2173
2174
2175
2176
2177 mdk_rdev_t *rdev;
2178 list_for_each_entry(rdev, &mddev->disks, same_set) {
2179 if (rdev->sb_events == mddev->events ||
2180 (nospares &&
2181 rdev->raid_disk < 0 &&
2182 rdev->sb_events+1 == mddev->events)) {
2183
2184 rdev->sb_loaded = 2;
2185 } else {
2186 sync_super(mddev, rdev);
2187 rdev->sb_loaded = 1;
2188 }
2189 }
2190}
2191
2192static void md_update_sb(mddev_t * mddev, int force_change)
2193{
2194 mdk_rdev_t *rdev;
2195 int sync_req;
2196 int nospares = 0;
2197
2198repeat:
2199
2200 list_for_each_entry(rdev, &mddev->disks, same_set) {
2201 if (rdev->raid_disk >= 0 &&
2202 mddev->delta_disks >= 0 &&
2203 !test_bit(In_sync, &rdev->flags) &&
2204 mddev->curr_resync_completed > rdev->recovery_offset)
2205 rdev->recovery_offset = mddev->curr_resync_completed;
2206
2207 }
2208 if (!mddev->persistent) {
2209 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2210 clear_bit(MD_CHANGE_DEVS, &mddev->flags);
2211 if (!mddev->external)
2212 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2213 wake_up(&mddev->sb_wait);
2214 return;
2215 }
2216
2217 spin_lock_irq(&mddev->write_lock);
2218
2219 mddev->utime = get_seconds();
2220
2221 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2222 force_change = 1;
2223 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2224
2225
2226
2227
2228 nospares = 1;
2229 if (force_change)
2230 nospares = 0;
2231 if (mddev->degraded)
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241 nospares = 0;
2242
2243 sync_req = mddev->in_sync;
2244
2245
2246
2247 if (nospares
2248 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2249 && mddev->can_decrease_events
2250 && mddev->events != 1) {
2251 mddev->events--;
2252 mddev->can_decrease_events = 0;
2253 } else {
2254
2255 mddev->events ++;
2256 mddev->can_decrease_events = nospares;
2257 }
2258
2259 if (!mddev->events) {
2260
2261
2262
2263
2264
2265 MD_BUG();
2266 mddev->events --;
2267 }
2268 sync_sbs(mddev, nospares);
2269 spin_unlock_irq(&mddev->write_lock);
2270
2271 dprintk(KERN_INFO
2272 "md: updating %s RAID superblock on device (in sync %d)\n",
2273 mdname(mddev),mddev->in_sync);
2274
2275 bitmap_update_sb(mddev->bitmap);
2276 list_for_each_entry(rdev, &mddev->disks, same_set) {
2277 char b[BDEVNAME_SIZE];
2278 dprintk(KERN_INFO "md: ");
2279 if (rdev->sb_loaded != 1)
2280 continue;
2281 if (test_bit(Faulty, &rdev->flags))
2282 dprintk("(skipping faulty ");
2283
2284 dprintk("%s ", bdevname(rdev->bdev,b));
2285 if (!test_bit(Faulty, &rdev->flags)) {
2286 md_super_write(mddev,rdev,
2287 rdev->sb_start, rdev->sb_size,
2288 rdev->sb_page);
2289 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
2290 bdevname(rdev->bdev,b),
2291 (unsigned long long)rdev->sb_start);
2292 rdev->sb_events = mddev->events;
2293
2294 } else
2295 dprintk(")\n");
2296 if (mddev->level == LEVEL_MULTIPATH)
2297
2298 break;
2299 }
2300 md_super_wait(mddev);
2301
2302
2303 spin_lock_irq(&mddev->write_lock);
2304 if (mddev->in_sync != sync_req ||
2305 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
2306
2307 spin_unlock_irq(&mddev->write_lock);
2308 goto repeat;
2309 }
2310 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2311 spin_unlock_irq(&mddev->write_lock);
2312 wake_up(&mddev->sb_wait);
2313 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2314 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2315
2316}
2317
2318
2319
2320
2321static int cmd_match(const char *cmd, const char *str)
2322{
2323
2324
2325
2326
2327 while (*cmd && *str && *cmd == *str) {
2328 cmd++;
2329 str++;
2330 }
2331 if (*cmd == '\n')
2332 cmd++;
2333 if (*str || *cmd)
2334 return 0;
2335 return 1;
2336}
2337
2338struct rdev_sysfs_entry {
2339 struct attribute attr;
2340 ssize_t (*show)(mdk_rdev_t *, char *);
2341 ssize_t (*store)(mdk_rdev_t *, const char *, size_t);
2342};
2343
2344static ssize_t
2345state_show(mdk_rdev_t *rdev, char *page)
2346{
2347 char *sep = "";
2348 size_t len = 0;
2349
2350 if (test_bit(Faulty, &rdev->flags)) {
2351 len+= sprintf(page+len, "%sfaulty",sep);
2352 sep = ",";
2353 }
2354 if (test_bit(In_sync, &rdev->flags)) {
2355 len += sprintf(page+len, "%sin_sync",sep);
2356 sep = ",";
2357 }
2358 if (test_bit(WriteMostly, &rdev->flags)) {
2359 len += sprintf(page+len, "%swrite_mostly",sep);
2360 sep = ",";
2361 }
2362 if (test_bit(Blocked, &rdev->flags)) {
2363 len += sprintf(page+len, "%sblocked", sep);
2364 sep = ",";
2365 }
2366 if (!test_bit(Faulty, &rdev->flags) &&
2367 !test_bit(In_sync, &rdev->flags)) {
2368 len += sprintf(page+len, "%sspare", sep);
2369 sep = ",";
2370 }
2371 return len+sprintf(page+len, "\n");
2372}
2373
2374static ssize_t
2375state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2376{
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386 int err = -EINVAL;
2387 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2388 md_error(rdev->mddev, rdev);
2389 err = 0;
2390 } else if (cmd_match(buf, "remove")) {
2391 if (rdev->raid_disk >= 0)
2392 err = -EBUSY;
2393 else {
2394 mddev_t *mddev = rdev->mddev;
2395 kick_rdev_from_array(rdev);
2396 if (mddev->pers)
2397 md_update_sb(mddev, 1);
2398 md_new_event(mddev);
2399 err = 0;
2400 }
2401 } else if (cmd_match(buf, "writemostly")) {
2402 set_bit(WriteMostly, &rdev->flags);
2403 err = 0;
2404 } else if (cmd_match(buf, "-writemostly")) {
2405 clear_bit(WriteMostly, &rdev->flags);
2406 err = 0;
2407 } else if (cmd_match(buf, "blocked")) {
2408 set_bit(Blocked, &rdev->flags);
2409 err = 0;
2410 } else if (cmd_match(buf, "-blocked")) {
2411 clear_bit(Blocked, &rdev->flags);
2412 wake_up(&rdev->blocked_wait);
2413 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2414 md_wakeup_thread(rdev->mddev->thread);
2415
2416 err = 0;
2417 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2418 set_bit(In_sync, &rdev->flags);
2419 err = 0;
2420 }
2421 if (!err)
2422 sysfs_notify_dirent_safe(rdev->sysfs_state);
2423 return err ? err : len;
2424}
2425static struct rdev_sysfs_entry rdev_state =
2426__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
2427
2428static ssize_t
2429errors_show(mdk_rdev_t *rdev, char *page)
2430{
2431 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2432}
2433
2434static ssize_t
2435errors_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2436{
2437 char *e;
2438 unsigned long n = simple_strtoul(buf, &e, 10);
2439 if (*buf && (*e == 0 || *e == '\n')) {
2440 atomic_set(&rdev->corrected_errors, n);
2441 return len;
2442 }
2443 return -EINVAL;
2444}
2445static struct rdev_sysfs_entry rdev_errors =
2446__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2447
2448static ssize_t
2449slot_show(mdk_rdev_t *rdev, char *page)
2450{
2451 if (rdev->raid_disk < 0)
2452 return sprintf(page, "none\n");
2453 else
2454 return sprintf(page, "%d\n", rdev->raid_disk);
2455}
2456
2457static ssize_t
2458slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2459{
2460 char *e;
2461 int err;
2462 char nm[20];
2463 int slot = simple_strtoul(buf, &e, 10);
2464 if (strncmp(buf, "none", 4)==0)
2465 slot = -1;
2466 else if (e==buf || (*e && *e!= '\n'))
2467 return -EINVAL;
2468 if (rdev->mddev->pers && slot == -1) {
2469
2470
2471
2472
2473
2474
2475
2476 if (rdev->raid_disk == -1)
2477 return -EEXIST;
2478
2479 if (rdev->mddev->pers->hot_remove_disk == NULL)
2480 return -EINVAL;
2481 err = rdev->mddev->pers->
2482 hot_remove_disk(rdev->mddev, rdev->raid_disk);
2483 if (err)
2484 return err;
2485 sprintf(nm, "rd%d", rdev->raid_disk);
2486 sysfs_remove_link(&rdev->mddev->kobj, nm);
2487 rdev->raid_disk = -1;
2488 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2489 md_wakeup_thread(rdev->mddev->thread);
2490 } else if (rdev->mddev->pers) {
2491 mdk_rdev_t *rdev2;
2492
2493
2494
2495
2496 if (rdev->raid_disk != -1)
2497 return -EBUSY;
2498
2499 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2500 return -EBUSY;
2501
2502 if (rdev->mddev->pers->hot_add_disk == NULL)
2503 return -EINVAL;
2504
2505 list_for_each_entry(rdev2, &rdev->mddev->disks, same_set)
2506 if (rdev2->raid_disk == slot)
2507 return -EEXIST;
2508
2509 if (slot >= rdev->mddev->raid_disks &&
2510 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2511 return -ENOSPC;
2512
2513 rdev->raid_disk = slot;
2514 if (test_bit(In_sync, &rdev->flags))
2515 rdev->saved_raid_disk = slot;
2516 else
2517 rdev->saved_raid_disk = -1;
2518 err = rdev->mddev->pers->
2519 hot_add_disk(rdev->mddev, rdev);
2520 if (err) {
2521 rdev->raid_disk = -1;
2522 return err;
2523 } else
2524 sysfs_notify_dirent_safe(rdev->sysfs_state);
2525 sprintf(nm, "rd%d", rdev->raid_disk);
2526 if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
2527 ;
2528
2529 } else {
2530 if (slot >= rdev->mddev->raid_disks &&
2531 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2532 return -ENOSPC;
2533 rdev->raid_disk = slot;
2534
2535 clear_bit(Faulty, &rdev->flags);
2536 clear_bit(WriteMostly, &rdev->flags);
2537 set_bit(In_sync, &rdev->flags);
2538 sysfs_notify_dirent_safe(rdev->sysfs_state);
2539 }
2540 return len;
2541}
2542
2543
2544static struct rdev_sysfs_entry rdev_slot =
2545__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2546
2547static ssize_t
2548offset_show(mdk_rdev_t *rdev, char *page)
2549{
2550 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2551}
2552
2553static ssize_t
2554offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2555{
2556 char *e;
2557 unsigned long long offset = simple_strtoull(buf, &e, 10);
2558 if (e==buf || (*e && *e != '\n'))
2559 return -EINVAL;
2560 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2561 return -EBUSY;
2562 if (rdev->sectors && rdev->mddev->external)
2563
2564
2565 return -EBUSY;
2566 rdev->data_offset = offset;
2567 return len;
2568}
2569
2570static struct rdev_sysfs_entry rdev_offset =
2571__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2572
2573static ssize_t
2574rdev_size_show(mdk_rdev_t *rdev, char *page)
2575{
2576 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2577}
2578
2579static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2580{
2581
2582 if (s1+l1 <= s2)
2583 return 0;
2584 if (s2+l2 <= s1)
2585 return 0;
2586 return 1;
2587}
2588
2589static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2590{
2591 unsigned long long blocks;
2592 sector_t new;
2593
2594 if (strict_strtoull(buf, 10, &blocks) < 0)
2595 return -EINVAL;
2596
2597 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2598 return -EINVAL;
2599
2600 new = blocks * 2;
2601 if (new != blocks * 2)
2602 return -EINVAL;
2603
2604 *sectors = new;
2605 return 0;
2606}
2607
2608static ssize_t
2609rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2610{
2611 mddev_t *my_mddev = rdev->mddev;
2612 sector_t oldsectors = rdev->sectors;
2613 sector_t sectors;
2614
2615 if (strict_blocks_to_sectors(buf, §ors) < 0)
2616 return -EINVAL;
2617 if (my_mddev->pers && rdev->raid_disk >= 0) {
2618 if (my_mddev->persistent) {
2619 sectors = super_types[my_mddev->major_version].
2620 rdev_size_change(rdev, sectors);
2621 if (!sectors)
2622 return -EBUSY;
2623 } else if (!sectors)
2624 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
2625 rdev->data_offset;
2626 }
2627 if (sectors < my_mddev->dev_sectors)
2628 return -EINVAL;
2629
2630 rdev->sectors = sectors;
2631 if (sectors > oldsectors && my_mddev->external) {
2632
2633
2634
2635
2636
2637 mddev_t *mddev;
2638 int overlap = 0;
2639 struct list_head *tmp;
2640
2641 mddev_unlock(my_mddev);
2642 for_each_mddev(mddev, tmp) {
2643 mdk_rdev_t *rdev2;
2644
2645 mddev_lock(mddev);
2646 list_for_each_entry(rdev2, &mddev->disks, same_set)
2647 if (rdev->bdev == rdev2->bdev &&
2648 rdev != rdev2 &&
2649 overlaps(rdev->data_offset, rdev->sectors,
2650 rdev2->data_offset,
2651 rdev2->sectors)) {
2652 overlap = 1;
2653 break;
2654 }
2655 mddev_unlock(mddev);
2656 if (overlap) {
2657 mddev_put(mddev);
2658 break;
2659 }
2660 }
2661 mddev_lock(my_mddev);
2662 if (overlap) {
2663
2664
2665
2666
2667
2668
2669 rdev->sectors = oldsectors;
2670 return -EBUSY;
2671 }
2672 }
2673 return len;
2674}
2675
2676static struct rdev_sysfs_entry rdev_size =
2677__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
2678
2679
2680static ssize_t recovery_start_show(mdk_rdev_t *rdev, char *page)
2681{
2682 unsigned long long recovery_start = rdev->recovery_offset;
2683
2684 if (test_bit(In_sync, &rdev->flags) ||
2685 recovery_start == MaxSector)
2686 return sprintf(page, "none\n");
2687
2688 return sprintf(page, "%llu\n", recovery_start);
2689}
2690
2691static ssize_t recovery_start_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2692{
2693 unsigned long long recovery_start;
2694
2695 if (cmd_match(buf, "none"))
2696 recovery_start = MaxSector;
2697 else if (strict_strtoull(buf, 10, &recovery_start))
2698 return -EINVAL;
2699
2700 if (rdev->mddev->pers &&
2701 rdev->raid_disk >= 0)
2702 return -EBUSY;
2703
2704 rdev->recovery_offset = recovery_start;
2705 if (recovery_start == MaxSector)
2706 set_bit(In_sync, &rdev->flags);
2707 else
2708 clear_bit(In_sync, &rdev->flags);
2709 return len;
2710}
2711
2712static struct rdev_sysfs_entry rdev_recovery_start =
2713__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
2714
2715static struct attribute *rdev_default_attrs[] = {
2716 &rdev_state.attr,
2717 &rdev_errors.attr,
2718 &rdev_slot.attr,
2719 &rdev_offset.attr,
2720 &rdev_size.attr,
2721 &rdev_recovery_start.attr,
2722 NULL,
2723};
2724static ssize_t
2725rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
2726{
2727 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
2728 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
2729 mddev_t *mddev = rdev->mddev;
2730 ssize_t rv;
2731
2732 if (!entry->show)
2733 return -EIO;
2734
2735 rv = mddev ? mddev_lock(mddev) : -EBUSY;
2736 if (!rv) {
2737 if (rdev->mddev == NULL)
2738 rv = -EBUSY;
2739 else
2740 rv = entry->show(rdev, page);
2741 mddev_unlock(mddev);
2742 }
2743 return rv;
2744}
2745
2746static ssize_t
2747rdev_attr_store(struct kobject *kobj, struct attribute *attr,
2748 const char *page, size_t length)
2749{
2750 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
2751 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
2752 ssize_t rv;
2753 mddev_t *mddev = rdev->mddev;
2754
2755 if (!entry->store)
2756 return -EIO;
2757 if (!capable(CAP_SYS_ADMIN))
2758 return -EACCES;
2759 rv = mddev ? mddev_lock(mddev): -EBUSY;
2760 if (!rv) {
2761 if (rdev->mddev == NULL)
2762 rv = -EBUSY;
2763 else
2764 rv = entry->store(rdev, page, length);
2765 mddev_unlock(mddev);
2766 }
2767 return rv;
2768}
2769
2770static void rdev_free(struct kobject *ko)
2771{
2772 mdk_rdev_t *rdev = container_of(ko, mdk_rdev_t, kobj);
2773 kfree(rdev);
2774}
2775static const struct sysfs_ops rdev_sysfs_ops = {
2776 .show = rdev_attr_show,
2777 .store = rdev_attr_store,
2778};
2779static struct kobj_type rdev_ktype = {
2780 .release = rdev_free,
2781 .sysfs_ops = &rdev_sysfs_ops,
2782 .default_attrs = rdev_default_attrs,
2783};
2784
2785void md_rdev_init(mdk_rdev_t *rdev)
2786{
2787 rdev->desc_nr = -1;
2788 rdev->saved_raid_disk = -1;
2789 rdev->raid_disk = -1;
2790 rdev->flags = 0;
2791 rdev->data_offset = 0;
2792 rdev->sb_events = 0;
2793 rdev->last_read_error.tv_sec = 0;
2794 rdev->last_read_error.tv_nsec = 0;
2795 atomic_set(&rdev->nr_pending, 0);
2796 atomic_set(&rdev->read_errors, 0);
2797 atomic_set(&rdev->corrected_errors, 0);
2798
2799 INIT_LIST_HEAD(&rdev->same_set);
2800 init_waitqueue_head(&rdev->blocked_wait);
2801}
2802EXPORT_SYMBOL_GPL(md_rdev_init);
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_minor)
2814{
2815 char b[BDEVNAME_SIZE];
2816 int err;
2817 mdk_rdev_t *rdev;
2818 sector_t size;
2819
2820 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
2821 if (!rdev) {
2822 printk(KERN_ERR "md: could not alloc mem for new device!\n");
2823 return ERR_PTR(-ENOMEM);
2824 }
2825
2826 md_rdev_init(rdev);
2827 if ((err = alloc_disk_sb(rdev)))
2828 goto abort_free;
2829
2830 err = lock_rdev(rdev, newdev, super_format == -2);
2831 if (err)
2832 goto abort_free;
2833
2834 kobject_init(&rdev->kobj, &rdev_ktype);
2835
2836 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
2837 if (!size) {
2838 printk(KERN_WARNING
2839 "md: %s has zero or unknown size, marking faulty!\n",
2840 bdevname(rdev->bdev,b));
2841 err = -EINVAL;
2842 goto abort_free;
2843 }
2844
2845 if (super_format >= 0) {
2846 err = super_types[super_format].
2847 load_super(rdev, NULL, super_minor);
2848 if (err == -EINVAL) {
2849 printk(KERN_WARNING
2850 "md: %s does not have a valid v%d.%d "
2851 "superblock, not importing!\n",
2852 bdevname(rdev->bdev,b),
2853 super_format, super_minor);
2854 goto abort_free;
2855 }
2856 if (err < 0) {
2857 printk(KERN_WARNING
2858 "md: could not read %s's sb, not importing!\n",
2859 bdevname(rdev->bdev,b));
2860 goto abort_free;
2861 }
2862 }
2863
2864 return rdev;
2865
2866abort_free:
2867 if (rdev->sb_page) {
2868 if (rdev->bdev)
2869 unlock_rdev(rdev);
2870 free_disk_sb(rdev);
2871 }
2872 kfree(rdev);
2873 return ERR_PTR(err);
2874}
2875
2876
2877
2878
2879
2880
2881static void analyze_sbs(mddev_t * mddev)
2882{
2883 int i;
2884 mdk_rdev_t *rdev, *freshest, *tmp;
2885 char b[BDEVNAME_SIZE];
2886
2887 freshest = NULL;
2888 rdev_for_each(rdev, tmp, mddev)
2889 switch (super_types[mddev->major_version].
2890 load_super(rdev, freshest, mddev->minor_version)) {
2891 case 1:
2892 freshest = rdev;
2893 break;
2894 case 0:
2895 break;
2896 default:
2897 printk( KERN_ERR \
2898 "md: fatal superblock inconsistency in %s"
2899 " -- removing from array\n",
2900 bdevname(rdev->bdev,b));
2901 kick_rdev_from_array(rdev);
2902 }
2903
2904
2905 super_types[mddev->major_version].
2906 validate_super(mddev, freshest);
2907
2908 i = 0;
2909 rdev_for_each(rdev, tmp, mddev) {
2910 if (mddev->max_disks &&
2911 (rdev->desc_nr >= mddev->max_disks ||
2912 i > mddev->max_disks)) {
2913 printk(KERN_WARNING
2914 "md: %s: %s: only %d devices permitted\n",
2915 mdname(mddev), bdevname(rdev->bdev, b),
2916 mddev->max_disks);
2917 kick_rdev_from_array(rdev);
2918 continue;
2919 }
2920 if (rdev != freshest)
2921 if (super_types[mddev->major_version].
2922 validate_super(mddev, rdev)) {
2923 printk(KERN_WARNING "md: kicking non-fresh %s"
2924 " from array!\n",
2925 bdevname(rdev->bdev,b));
2926 kick_rdev_from_array(rdev);
2927 continue;
2928 }
2929 if (mddev->level == LEVEL_MULTIPATH) {
2930 rdev->desc_nr = i++;
2931 rdev->raid_disk = rdev->desc_nr;
2932 set_bit(In_sync, &rdev->flags);
2933 } else if (rdev->raid_disk >= (mddev->raid_disks - min(0, mddev->delta_disks))) {
2934 rdev->raid_disk = -1;
2935 clear_bit(In_sync, &rdev->flags);
2936 }
2937 }
2938}
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
2951{
2952 unsigned long result = 0;
2953 long decimals = -1;
2954 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
2955 if (*cp == '.')
2956 decimals = 0;
2957 else if (decimals < scale) {
2958 unsigned int value;
2959 value = *cp - '0';
2960 result = result * 10 + value;
2961 if (decimals >= 0)
2962 decimals++;
2963 }
2964 cp++;
2965 }
2966 if (*cp == '\n')
2967 cp++;
2968 if (*cp)
2969 return -EINVAL;
2970 if (decimals < 0)
2971 decimals = 0;
2972 while (decimals < scale) {
2973 result *= 10;
2974 decimals ++;
2975 }
2976 *res = result;
2977 return 0;
2978}
2979
2980
2981static void md_safemode_timeout(unsigned long data);
2982
2983static ssize_t
2984safe_delay_show(mddev_t *mddev, char *page)
2985{
2986 int msec = (mddev->safemode_delay*1000)/HZ;
2987 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
2988}
2989static ssize_t
2990safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
2991{
2992 unsigned long msec;
2993
2994 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
2995 return -EINVAL;
2996 if (msec == 0)
2997 mddev->safemode_delay = 0;
2998 else {
2999 unsigned long old_delay = mddev->safemode_delay;
3000 mddev->safemode_delay = (msec*HZ)/1000;
3001 if (mddev->safemode_delay == 0)
3002 mddev->safemode_delay = 1;
3003 if (mddev->safemode_delay < old_delay)
3004 md_safemode_timeout((unsigned long)mddev);
3005 }
3006 return len;
3007}
3008static struct md_sysfs_entry md_safe_delay =
3009__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3010
3011static ssize_t
3012level_show(mddev_t *mddev, char *page)
3013{
3014 struct mdk_personality *p = mddev->pers;
3015 if (p)
3016 return sprintf(page, "%s\n", p->name);
3017 else if (mddev->clevel[0])
3018 return sprintf(page, "%s\n", mddev->clevel);
3019 else if (mddev->level != LEVEL_NONE)
3020 return sprintf(page, "%d\n", mddev->level);
3021 else
3022 return 0;
3023}
3024
3025static ssize_t
3026level_store(mddev_t *mddev, const char *buf, size_t len)
3027{
3028 char clevel[16];
3029 ssize_t rv = len;
3030 struct mdk_personality *pers;
3031 long level;
3032 void *priv;
3033 mdk_rdev_t *rdev;
3034
3035 if (mddev->pers == NULL) {
3036 if (len == 0)
3037 return 0;
3038 if (len >= sizeof(mddev->clevel))
3039 return -ENOSPC;
3040 strncpy(mddev->clevel, buf, len);
3041 if (mddev->clevel[len-1] == '\n')
3042 len--;
3043 mddev->clevel[len] = 0;
3044 mddev->level = LEVEL_NONE;
3045 return rv;
3046 }
3047
3048
3049
3050
3051
3052
3053
3054 if (mddev->sync_thread ||
3055 mddev->reshape_position != MaxSector ||
3056 mddev->sysfs_active)
3057 return -EBUSY;
3058
3059 if (!mddev->pers->quiesce) {
3060 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
3061 mdname(mddev), mddev->pers->name);
3062 return -EINVAL;
3063 }
3064
3065
3066 if (len == 0 || len >= sizeof(clevel))
3067 return -EINVAL;
3068 strncpy(clevel, buf, len);
3069 if (clevel[len-1] == '\n')
3070 len--;
3071 clevel[len] = 0;
3072 if (strict_strtol(clevel, 10, &level))
3073 level = LEVEL_NONE;
3074
3075 if (request_module("md-%s", clevel) != 0)
3076 request_module("md-level-%s", clevel);
3077 spin_lock(&pers_lock);
3078 pers = find_pers(level, clevel);
3079 if (!pers || !try_module_get(pers->owner)) {
3080 spin_unlock(&pers_lock);
3081 printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
3082 return -EINVAL;
3083 }
3084 spin_unlock(&pers_lock);
3085
3086 if (pers == mddev->pers) {
3087
3088 module_put(pers->owner);
3089 return rv;
3090 }
3091 if (!pers->takeover) {
3092 module_put(pers->owner);
3093 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
3094 mdname(mddev), clevel);
3095 return -EINVAL;
3096 }
3097
3098 list_for_each_entry(rdev, &mddev->disks, same_set)
3099 rdev->new_raid_disk = rdev->raid_disk;
3100
3101
3102
3103
3104 priv = pers->takeover(mddev);
3105 if (IS_ERR(priv)) {
3106 mddev->new_level = mddev->level;
3107 mddev->new_layout = mddev->layout;
3108 mddev->new_chunk_sectors = mddev->chunk_sectors;
3109 mddev->raid_disks -= mddev->delta_disks;
3110 mddev->delta_disks = 0;
3111 module_put(pers->owner);
3112 printk(KERN_WARNING "md: %s: %s would not accept array\n",
3113 mdname(mddev), clevel);
3114 return PTR_ERR(priv);
3115 }
3116
3117
3118 mddev_suspend(mddev);
3119 mddev->pers->stop(mddev);
3120
3121 if (mddev->pers->sync_request == NULL &&
3122 pers->sync_request != NULL) {
3123
3124 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3125 printk(KERN_WARNING
3126 "md: cannot register extra attributes for %s\n",
3127 mdname(mddev));
3128 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action");
3129 }
3130 if (mddev->pers->sync_request != NULL &&
3131 pers->sync_request == NULL) {
3132
3133 if (mddev->to_remove == NULL)
3134 mddev->to_remove = &md_redundancy_group;
3135 }
3136
3137 if (mddev->pers->sync_request == NULL &&
3138 mddev->external) {
3139
3140
3141
3142
3143
3144
3145
3146 mddev->in_sync = 0;
3147 mddev->safemode_delay = 0;
3148 mddev->safemode = 0;
3149 }
3150
3151 list_for_each_entry(rdev, &mddev->disks, same_set) {
3152 char nm[20];
3153 if (rdev->raid_disk < 0)
3154 continue;
3155 if (rdev->new_raid_disk >= mddev->raid_disks)
3156 rdev->new_raid_disk = -1;
3157 if (rdev->new_raid_disk == rdev->raid_disk)
3158 continue;
3159 sprintf(nm, "rd%d", rdev->raid_disk);
3160 sysfs_remove_link(&mddev->kobj, nm);
3161 }
3162 list_for_each_entry(rdev, &mddev->disks, same_set) {
3163 if (rdev->raid_disk < 0)
3164 continue;
3165 if (rdev->new_raid_disk == rdev->raid_disk)
3166 continue;
3167 rdev->raid_disk = rdev->new_raid_disk;
3168 if (rdev->raid_disk < 0)
3169 clear_bit(In_sync, &rdev->flags);
3170 else {
3171 char nm[20];
3172 sprintf(nm, "rd%d", rdev->raid_disk);
3173 if(sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
3174 printk("md: cannot register %s for %s after level change\n",
3175 nm, mdname(mddev));
3176 }
3177 }
3178
3179 module_put(mddev->pers->owner);
3180 mddev->pers = pers;
3181 mddev->private = priv;
3182 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3183 mddev->level = mddev->new_level;
3184 mddev->layout = mddev->new_layout;
3185 mddev->chunk_sectors = mddev->new_chunk_sectors;
3186 mddev->delta_disks = 0;
3187 mddev->degraded = 0;
3188 if (mddev->pers->sync_request == NULL) {
3189
3190
3191
3192 mddev->in_sync = 1;
3193 del_timer_sync(&mddev->safemode_timer);
3194 }
3195 pers->run(mddev);
3196 mddev_resume(mddev);
3197 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3198 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3199 md_wakeup_thread(mddev->thread);
3200 sysfs_notify(&mddev->kobj, NULL, "level");
3201 md_new_event(mddev);
3202 return rv;
3203}
3204
3205static struct md_sysfs_entry md_level =
3206__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3207
3208
3209static ssize_t
3210layout_show(mddev_t *mddev, char *page)
3211{
3212
3213 if (mddev->reshape_position != MaxSector &&
3214 mddev->layout != mddev->new_layout)
3215 return sprintf(page, "%d (%d)\n",
3216 mddev->new_layout, mddev->layout);
3217 return sprintf(page, "%d\n", mddev->layout);
3218}
3219
3220static ssize_t
3221layout_store(mddev_t *mddev, const char *buf, size_t len)
3222{
3223 char *e;
3224 unsigned long n = simple_strtoul(buf, &e, 10);
3225
3226 if (!*buf || (*e && *e != '\n'))
3227 return -EINVAL;
3228
3229 if (mddev->pers) {
3230 int err;
3231 if (mddev->pers->check_reshape == NULL)
3232 return -EBUSY;
3233 mddev->new_layout = n;
3234 err = mddev->pers->check_reshape(mddev);
3235 if (err) {
3236 mddev->new_layout = mddev->layout;
3237 return err;
3238 }
3239 } else {
3240 mddev->new_layout = n;
3241 if (mddev->reshape_position == MaxSector)
3242 mddev->layout = n;
3243 }
3244 return len;
3245}
3246static struct md_sysfs_entry md_layout =
3247__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3248
3249
3250static ssize_t
3251raid_disks_show(mddev_t *mddev, char *page)
3252{
3253 if (mddev->raid_disks == 0)
3254 return 0;
3255 if (mddev->reshape_position != MaxSector &&
3256 mddev->delta_disks != 0)
3257 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3258 mddev->raid_disks - mddev->delta_disks);
3259 return sprintf(page, "%d\n", mddev->raid_disks);
3260}
3261
3262static int update_raid_disks(mddev_t *mddev, int raid_disks);
3263
3264static ssize_t
3265raid_disks_store(mddev_t *mddev, const char *buf, size_t len)
3266{
3267 char *e;
3268 int rv = 0;
3269 unsigned long n = simple_strtoul(buf, &e, 10);
3270
3271 if (!*buf || (*e && *e != '\n'))
3272 return -EINVAL;
3273
3274 if (mddev->pers)
3275 rv = update_raid_disks(mddev, n);
3276 else if (mddev->reshape_position != MaxSector) {
3277 int olddisks = mddev->raid_disks - mddev->delta_disks;
3278 mddev->delta_disks = n - olddisks;
3279 mddev->raid_disks = n;
3280 } else
3281 mddev->raid_disks = n;
3282 return rv ? rv : len;
3283}
3284static struct md_sysfs_entry md_raid_disks =
3285__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3286
3287static ssize_t
3288chunk_size_show(mddev_t *mddev, char *page)
3289{
3290 if (mddev->reshape_position != MaxSector &&
3291 mddev->chunk_sectors != mddev->new_chunk_sectors)
3292 return sprintf(page, "%d (%d)\n",
3293 mddev->new_chunk_sectors << 9,
3294 mddev->chunk_sectors << 9);
3295 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3296}
3297
3298static ssize_t
3299chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
3300{
3301 char *e;
3302 unsigned long n = simple_strtoul(buf, &e, 10);
3303
3304 if (!*buf || (*e && *e != '\n'))
3305 return -EINVAL;
3306
3307 if (mddev->pers) {
3308 int err;
3309 if (mddev->pers->check_reshape == NULL)
3310 return -EBUSY;
3311 mddev->new_chunk_sectors = n >> 9;
3312 err = mddev->pers->check_reshape(mddev);
3313 if (err) {
3314 mddev->new_chunk_sectors = mddev->chunk_sectors;
3315 return err;
3316 }
3317 } else {
3318 mddev->new_chunk_sectors = n >> 9;
3319 if (mddev->reshape_position == MaxSector)
3320 mddev->chunk_sectors = n >> 9;
3321 }
3322 return len;
3323}
3324static struct md_sysfs_entry md_chunk_size =
3325__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3326
3327static ssize_t
3328resync_start_show(mddev_t *mddev, char *page)
3329{
3330 if (mddev->recovery_cp == MaxSector)
3331 return sprintf(page, "none\n");
3332 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3333}
3334
3335static ssize_t
3336resync_start_store(mddev_t *mddev, const char *buf, size_t len)
3337{
3338 char *e;
3339 unsigned long long n = simple_strtoull(buf, &e, 10);
3340
3341 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3342 return -EBUSY;
3343 if (cmd_match(buf, "none"))
3344 n = MaxSector;
3345 else if (!*buf || (*e && *e != '\n'))
3346 return -EINVAL;
3347
3348 mddev->recovery_cp = n;
3349 return len;
3350}
3351static struct md_sysfs_entry md_resync_start =
3352__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3391 write_pending, active_idle, bad_word};
3392static char *array_states[] = {
3393 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3394 "write-pending", "active-idle", NULL };
3395
3396static int match_word(const char *word, char **list)
3397{
3398 int n;
3399 for (n=0; list[n]; n++)
3400 if (cmd_match(word, list[n]))
3401 break;
3402 return n;
3403}
3404
3405static ssize_t
3406array_state_show(mddev_t *mddev, char *page)
3407{
3408 enum array_state st = inactive;
3409
3410 if (mddev->pers)
3411 switch(mddev->ro) {
3412 case 1:
3413 st = readonly;
3414 break;
3415 case 2:
3416 st = read_auto;
3417 break;
3418 case 0:
3419 if (mddev->in_sync)
3420 st = clean;
3421 else if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
3422 st = write_pending;
3423 else if (mddev->safemode)
3424 st = active_idle;
3425 else
3426 st = active;
3427 }
3428 else {
3429 if (list_empty(&mddev->disks) &&
3430 mddev->raid_disks == 0 &&
3431 mddev->dev_sectors == 0)
3432 st = clear;
3433 else
3434 st = inactive;
3435 }
3436 return sprintf(page, "%s\n", array_states[st]);
3437}
3438
3439static int do_md_stop(mddev_t * mddev, int ro, int is_open);
3440static int md_set_readonly(mddev_t * mddev, int is_open);
3441static int do_md_run(mddev_t * mddev);
3442static int restart_array(mddev_t *mddev);
3443
3444static ssize_t
3445array_state_store(mddev_t *mddev, const char *buf, size_t len)
3446{
3447 int err = -EINVAL;
3448 enum array_state st = match_word(buf, array_states);
3449 switch(st) {
3450 case bad_word:
3451 break;
3452 case clear:
3453
3454 if (atomic_read(&mddev->openers) > 0)
3455 return -EBUSY;
3456 err = do_md_stop(mddev, 0, 0);
3457 break;
3458 case inactive:
3459
3460 if (mddev->pers) {
3461 if (atomic_read(&mddev->openers) > 0)
3462 return -EBUSY;
3463 err = do_md_stop(mddev, 2, 0);
3464 } else
3465 err = 0;
3466 break;
3467 case suspended:
3468 break;
3469 case readonly:
3470 if (mddev->pers)
3471 err = md_set_readonly(mddev, 0);
3472 else {
3473 mddev->ro = 1;
3474 set_disk_ro(mddev->gendisk, 1);
3475 err = do_md_run(mddev);
3476 }
3477 break;
3478 case read_auto:
3479 if (mddev->pers) {
3480 if (mddev->ro == 0)
3481 err = md_set_readonly(mddev, 0);
3482 else if (mddev->ro == 1)
3483 err = restart_array(mddev);
3484 if (err == 0) {
3485 mddev->ro = 2;
3486 set_disk_ro(mddev->gendisk, 0);
3487 }
3488 } else {
3489 mddev->ro = 2;
3490 err = do_md_run(mddev);
3491 }
3492 break;
3493 case clean:
3494 if (mddev->pers) {
3495 restart_array(mddev);
3496 spin_lock_irq(&mddev->write_lock);
3497 if (atomic_read(&mddev->writes_pending) == 0) {
3498 if (mddev->in_sync == 0) {
3499 mddev->in_sync = 1;
3500 if (mddev->safemode == 1)
3501 mddev->safemode = 0;
3502 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3503 }
3504 err = 0;
3505 } else
3506 err = -EBUSY;
3507 spin_unlock_irq(&mddev->write_lock);
3508 } else
3509 err = -EINVAL;
3510 break;
3511 case active:
3512 if (mddev->pers) {
3513 restart_array(mddev);
3514 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
3515 wake_up(&mddev->sb_wait);
3516 err = 0;
3517 } else {
3518 mddev->ro = 0;
3519 set_disk_ro(mddev->gendisk, 0);
3520 err = do_md_run(mddev);
3521 }
3522 break;
3523 case write_pending:
3524 case active_idle:
3525
3526 break;
3527 }
3528 if (err)
3529 return err;
3530 else {
3531 sysfs_notify_dirent_safe(mddev->sysfs_state);
3532 return len;
3533 }
3534}
3535static struct md_sysfs_entry md_array_state =
3536__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
3537
3538static ssize_t
3539max_corrected_read_errors_show(mddev_t *mddev, char *page) {
3540 return sprintf(page, "%d\n",
3541 atomic_read(&mddev->max_corr_read_errors));
3542}
3543
3544static ssize_t
3545max_corrected_read_errors_store(mddev_t *mddev, const char *buf, size_t len)
3546{
3547 char *e;
3548 unsigned long n = simple_strtoul(buf, &e, 10);
3549
3550 if (*buf && (*e == 0 || *e == '\n')) {
3551 atomic_set(&mddev->max_corr_read_errors, n);
3552 return len;
3553 }
3554 return -EINVAL;
3555}
3556
3557static struct md_sysfs_entry max_corr_read_errors =
3558__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
3559 max_corrected_read_errors_store);
3560
3561static ssize_t
3562null_show(mddev_t *mddev, char *page)
3563{
3564 return -EINVAL;
3565}
3566
3567static ssize_t
3568new_dev_store(mddev_t *mddev, const char *buf, size_t len)
3569{
3570
3571
3572
3573
3574
3575
3576
3577 char *e;
3578 int major = simple_strtoul(buf, &e, 10);
3579 int minor;
3580 dev_t dev;
3581 mdk_rdev_t *rdev;
3582 int err;
3583
3584 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
3585 return -EINVAL;
3586 minor = simple_strtoul(e+1, &e, 10);
3587 if (*e && *e != '\n')
3588 return -EINVAL;
3589 dev = MKDEV(major, minor);
3590 if (major != MAJOR(dev) ||
3591 minor != MINOR(dev))
3592 return -EOVERFLOW;
3593
3594
3595 if (mddev->persistent) {
3596 rdev = md_import_device(dev, mddev->major_version,
3597 mddev->minor_version);
3598 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
3599 mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
3600 mdk_rdev_t, same_set);
3601 err = super_types[mddev->major_version]
3602 .load_super(rdev, rdev0, mddev->minor_version);
3603 if (err < 0)
3604 goto out;
3605 }
3606 } else if (mddev->external)
3607 rdev = md_import_device(dev, -2, -1);
3608 else
3609 rdev = md_import_device(dev, -1, -1);
3610
3611 if (IS_ERR(rdev))
3612 return PTR_ERR(rdev);
3613 err = bind_rdev_to_array(rdev, mddev);
3614 out:
3615 if (err)
3616 export_rdev(rdev);
3617 return err ? err : len;
3618}
3619
3620static struct md_sysfs_entry md_new_device =
3621__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
3622
3623static ssize_t
3624bitmap_store(mddev_t *mddev, const char *buf, size_t len)
3625{
3626 char *end;
3627 unsigned long chunk, end_chunk;
3628
3629 if (!mddev->bitmap)
3630 goto out;
3631
3632 while (*buf) {
3633 chunk = end_chunk = simple_strtoul(buf, &end, 0);
3634 if (buf == end) break;
3635 if (*end == '-') {
3636 buf = end + 1;
3637 end_chunk = simple_strtoul(buf, &end, 0);
3638 if (buf == end) break;
3639 }
3640 if (*end && !isspace(*end)) break;
3641 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
3642 buf = skip_spaces(end);
3643 }
3644 bitmap_unplug(mddev->bitmap);
3645out:
3646 return len;
3647}
3648
3649static struct md_sysfs_entry md_bitmap =
3650__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
3651
3652static ssize_t
3653size_show(mddev_t *mddev, char *page)
3654{
3655 return sprintf(page, "%llu\n",
3656 (unsigned long long)mddev->dev_sectors / 2);
3657}
3658
3659static int update_size(mddev_t *mddev, sector_t num_sectors);
3660
3661static ssize_t
3662size_store(mddev_t *mddev, const char *buf, size_t len)
3663{
3664
3665
3666
3667
3668 sector_t sectors;
3669 int err = strict_blocks_to_sectors(buf, §ors);
3670
3671 if (err < 0)
3672 return err;
3673 if (mddev->pers) {
3674 err = update_size(mddev, sectors);
3675 md_update_sb(mddev, 1);
3676 } else {
3677 if (mddev->dev_sectors == 0 ||
3678 mddev->dev_sectors > sectors)
3679 mddev->dev_sectors = sectors;
3680 else
3681 err = -ENOSPC;
3682 }
3683 return err ? err : len;
3684}
3685
3686static struct md_sysfs_entry md_size =
3687__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
3688
3689
3690
3691
3692
3693
3694
3695
3696static ssize_t
3697metadata_show(mddev_t *mddev, char *page)
3698{
3699 if (mddev->persistent)
3700 return sprintf(page, "%d.%d\n",
3701 mddev->major_version, mddev->minor_version);
3702 else if (mddev->external)
3703 return sprintf(page, "external:%s\n", mddev->metadata_type);
3704 else
3705 return sprintf(page, "none\n");
3706}
3707
3708static ssize_t
3709metadata_store(mddev_t *mddev, const char *buf, size_t len)
3710{
3711 int major, minor;
3712 char *e;
3713
3714
3715
3716
3717 if (mddev->external && strncmp(buf, "external:", 9) == 0)
3718 ;
3719 else if (!list_empty(&mddev->disks))
3720 return -EBUSY;
3721
3722 if (cmd_match(buf, "none")) {
3723 mddev->persistent = 0;
3724 mddev->external = 0;
3725 mddev->major_version = 0;
3726 mddev->minor_version = 90;
3727 return len;
3728 }
3729 if (strncmp(buf, "external:", 9) == 0) {
3730 size_t namelen = len-9;
3731 if (namelen >= sizeof(mddev->metadata_type))
3732 namelen = sizeof(mddev->metadata_type)-1;
3733 strncpy(mddev->metadata_type, buf+9, namelen);
3734 mddev->metadata_type[namelen] = 0;
3735 if (namelen && mddev->metadata_type[namelen-1] == '\n')
3736 mddev->metadata_type[--namelen] = 0;
3737 mddev->persistent = 0;
3738 mddev->external = 1;
3739 mddev->major_version = 0;
3740 mddev->minor_version = 90;
3741 return len;
3742 }
3743 major = simple_strtoul(buf, &e, 10);
3744 if (e==buf || *e != '.')
3745 return -EINVAL;
3746 buf = e+1;
3747 minor = simple_strtoul(buf, &e, 10);
3748 if (e==buf || (*e && *e != '\n') )
3749 return -EINVAL;
3750 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
3751 return -ENOENT;
3752 mddev->major_version = major;
3753 mddev->minor_version = minor;
3754 mddev->persistent = 1;
3755 mddev->external = 0;
3756 return len;
3757}
3758
3759static struct md_sysfs_entry md_metadata =
3760__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
3761
3762static ssize_t
3763action_show(mddev_t *mddev, char *page)
3764{
3765 char *type = "idle";
3766 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3767 type = "frozen";
3768 else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3769 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
3770 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
3771 type = "reshape";
3772 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
3773 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
3774 type = "resync";
3775 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
3776 type = "check";
3777 else
3778 type = "repair";
3779 } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
3780 type = "recover";
3781 }
3782 return sprintf(page, "%s\n", type);
3783}
3784
3785static void reap_sync_thread(mddev_t *mddev);
3786
3787static ssize_t
3788action_store(mddev_t *mddev, const char *page, size_t len)
3789{
3790 if (!mddev->pers || !mddev->pers->sync_request)
3791 return -EINVAL;
3792
3793 if (cmd_match(page, "frozen"))
3794 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3795 else
3796 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3797
3798 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
3799 if (mddev->sync_thread) {
3800 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
3801 reap_sync_thread(mddev);
3802 }
3803 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3804 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
3805 return -EBUSY;
3806 else if (cmd_match(page, "resync"))
3807 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3808 else if (cmd_match(page, "recover")) {
3809 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
3810 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3811 } else if (cmd_match(page, "reshape")) {
3812 int err;
3813 if (mddev->pers->start_reshape == NULL)
3814 return -EINVAL;
3815 err = mddev->pers->start_reshape(mddev);
3816 if (err)
3817 return err;
3818 sysfs_notify(&mddev->kobj, NULL, "degraded");
3819 } else {
3820 if (cmd_match(page, "check"))
3821 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
3822 else if (!cmd_match(page, "repair"))
3823 return -EINVAL;
3824 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
3825 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
3826 }
3827 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3828 md_wakeup_thread(mddev->thread);
3829 sysfs_notify_dirent_safe(mddev->sysfs_action);
3830 return len;
3831}
3832
3833static ssize_t
3834mismatch_cnt_show(mddev_t *mddev, char *page)
3835{
3836 return sprintf(page, "%llu\n",
3837 (unsigned long long) mddev->resync_mismatches);
3838}
3839
3840static struct md_sysfs_entry md_scan_mode =
3841__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
3842
3843
3844static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
3845
3846static ssize_t
3847sync_min_show(mddev_t *mddev, char *page)
3848{
3849 return sprintf(page, "%d (%s)\n", speed_min(mddev),
3850 mddev->sync_speed_min ? "local": "system");
3851}
3852
3853static ssize_t
3854sync_min_store(mddev_t *mddev, const char *buf, size_t len)
3855{
3856 int min;
3857 char *e;
3858 if (strncmp(buf, "system", 6)==0) {
3859 mddev->sync_speed_min = 0;
3860 return len;
3861 }
3862 min = simple_strtoul(buf, &e, 10);
3863 if (buf == e || (*e && *e != '\n') || min <= 0)
3864 return -EINVAL;
3865 mddev->sync_speed_min = min;
3866 return len;
3867}
3868
3869static struct md_sysfs_entry md_sync_min =
3870__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
3871
3872static ssize_t
3873sync_max_show(mddev_t *mddev, char *page)
3874{
3875 return sprintf(page, "%d (%s)\n", speed_max(mddev),
3876 mddev->sync_speed_max ? "local": "system");
3877}
3878
3879static ssize_t
3880sync_max_store(mddev_t *mddev, const char *buf, size_t len)
3881{
3882 int max;
3883 char *e;
3884 if (strncmp(buf, "system", 6)==0) {
3885 mddev->sync_speed_max = 0;
3886 return len;
3887 }
3888 max = simple_strtoul(buf, &e, 10);
3889 if (buf == e || (*e && *e != '\n') || max <= 0)
3890 return -EINVAL;
3891 mddev->sync_speed_max = max;
3892 return len;
3893}
3894
3895static struct md_sysfs_entry md_sync_max =
3896__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
3897
3898static ssize_t
3899degraded_show(mddev_t *mddev, char *page)
3900{
3901 return sprintf(page, "%d\n", mddev->degraded);
3902}
3903static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
3904
3905static ssize_t
3906sync_force_parallel_show(mddev_t *mddev, char *page)
3907{
3908 return sprintf(page, "%d\n", mddev->parallel_resync);
3909}
3910
3911static ssize_t
3912sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len)
3913{
3914 long n;
3915
3916 if (strict_strtol(buf, 10, &n))
3917 return -EINVAL;
3918
3919 if (n != 0 && n != 1)
3920 return -EINVAL;
3921
3922 mddev->parallel_resync = n;
3923
3924 if (mddev->sync_thread)
3925 wake_up(&resync_wait);
3926
3927 return len;
3928}
3929
3930
3931static struct md_sysfs_entry md_sync_force_parallel =
3932__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
3933 sync_force_parallel_show, sync_force_parallel_store);
3934
3935static ssize_t
3936sync_speed_show(mddev_t *mddev, char *page)
3937{
3938 unsigned long resync, dt, db;
3939 if (mddev->curr_resync == 0)
3940 return sprintf(page, "none\n");
3941 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
3942 dt = (jiffies - mddev->resync_mark) / HZ;
3943 if (!dt) dt++;
3944 db = resync - mddev->resync_mark_cnt;
3945 return sprintf(page, "%lu\n", db/dt/2);
3946}
3947
3948static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
3949
3950static ssize_t
3951sync_completed_show(mddev_t *mddev, char *page)
3952{
3953 unsigned long long max_sectors, resync;
3954
3955 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3956 return sprintf(page, "none\n");
3957
3958 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
3959 max_sectors = mddev->resync_max_sectors;
3960 else
3961 max_sectors = mddev->dev_sectors;
3962
3963 resync = mddev->curr_resync_completed;
3964 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
3965}
3966
3967static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
3968
3969static ssize_t
3970min_sync_show(mddev_t *mddev, char *page)
3971{
3972 return sprintf(page, "%llu\n",
3973 (unsigned long long)mddev->resync_min);
3974}
3975static ssize_t
3976min_sync_store(mddev_t *mddev, const char *buf, size_t len)
3977{
3978 unsigned long long min;
3979 if (strict_strtoull(buf, 10, &min))
3980 return -EINVAL;
3981 if (min > mddev->resync_max)
3982 return -EINVAL;
3983 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3984 return -EBUSY;
3985
3986
3987 if (mddev->chunk_sectors) {
3988 sector_t temp = min;
3989 if (sector_div(temp, mddev->chunk_sectors))
3990 return -EINVAL;
3991 }
3992 mddev->resync_min = min;
3993
3994 return len;
3995}
3996
3997static struct md_sysfs_entry md_min_sync =
3998__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
3999
4000static ssize_t
4001max_sync_show(mddev_t *mddev, char *page)
4002{
4003 if (mddev->resync_max == MaxSector)
4004 return sprintf(page, "max\n");
4005 else
4006 return sprintf(page, "%llu\n",
4007 (unsigned long long)mddev->resync_max);
4008}
4009static ssize_t
4010max_sync_store(mddev_t *mddev, const char *buf, size_t len)
4011{
4012 if (strncmp(buf, "max", 3) == 0)
4013 mddev->resync_max = MaxSector;
4014 else {
4015 unsigned long long max;
4016 if (strict_strtoull(buf, 10, &max))
4017 return -EINVAL;
4018 if (max < mddev->resync_min)
4019 return -EINVAL;
4020 if (max < mddev->resync_max &&
4021 mddev->ro == 0 &&
4022 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4023 return -EBUSY;
4024
4025
4026 if (mddev->chunk_sectors) {
4027 sector_t temp = max;
4028 if (sector_div(temp, mddev->chunk_sectors))
4029 return -EINVAL;
4030 }
4031 mddev->resync_max = max;
4032 }
4033 wake_up(&mddev->recovery_wait);
4034 return len;
4035}
4036
4037static struct md_sysfs_entry md_max_sync =
4038__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4039
4040static ssize_t
4041suspend_lo_show(mddev_t *mddev, char *page)
4042{
4043 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4044}
4045
4046static ssize_t
4047suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
4048{
4049 char *e;
4050 unsigned long long new = simple_strtoull(buf, &e, 10);
4051 unsigned long long old = mddev->suspend_lo;
4052
4053 if (mddev->pers == NULL ||
4054 mddev->pers->quiesce == NULL)
4055 return -EINVAL;
4056 if (buf == e || (*e && *e != '\n'))
4057 return -EINVAL;
4058
4059 mddev->suspend_lo = new;
4060 if (new >= old)
4061
4062 mddev->pers->quiesce(mddev, 2);
4063 else {
4064
4065 mddev->pers->quiesce(mddev, 1);
4066 mddev->pers->quiesce(mddev, 0);
4067 }
4068 return len;
4069}
4070static struct md_sysfs_entry md_suspend_lo =
4071__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4072
4073
4074static ssize_t
4075suspend_hi_show(mddev_t *mddev, char *page)
4076{
4077 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4078}
4079
4080static ssize_t
4081suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
4082{
4083 char *e;
4084 unsigned long long new = simple_strtoull(buf, &e, 10);
4085 unsigned long long old = mddev->suspend_hi;
4086
4087 if (mddev->pers == NULL ||
4088 mddev->pers->quiesce == NULL)
4089 return -EINVAL;
4090 if (buf == e || (*e && *e != '\n'))
4091 return -EINVAL;
4092
4093 mddev->suspend_hi = new;
4094 if (new <= old)
4095
4096 mddev->pers->quiesce(mddev, 2);
4097 else {
4098
4099 mddev->pers->quiesce(mddev, 1);
4100 mddev->pers->quiesce(mddev, 0);
4101 }
4102 return len;
4103}
4104static struct md_sysfs_entry md_suspend_hi =
4105__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4106
4107static ssize_t
4108reshape_position_show(mddev_t *mddev, char *page)
4109{
4110 if (mddev->reshape_position != MaxSector)
4111 return sprintf(page, "%llu\n",
4112 (unsigned long long)mddev->reshape_position);
4113 strcpy(page, "none\n");
4114 return 5;
4115}
4116
4117static ssize_t
4118reshape_position_store(mddev_t *mddev, const char *buf, size_t len)
4119{
4120 char *e;
4121 unsigned long long new = simple_strtoull(buf, &e, 10);
4122 if (mddev->pers)
4123 return -EBUSY;
4124 if (buf == e || (*e && *e != '\n'))
4125 return -EINVAL;
4126 mddev->reshape_position = new;
4127 mddev->delta_disks = 0;
4128 mddev->new_level = mddev->level;
4129 mddev->new_layout = mddev->layout;
4130 mddev->new_chunk_sectors = mddev->chunk_sectors;
4131 return len;
4132}
4133
4134static struct md_sysfs_entry md_reshape_position =
4135__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4136 reshape_position_store);
4137
4138static ssize_t
4139array_size_show(mddev_t *mddev, char *page)
4140{
4141 if (mddev->external_size)
4142 return sprintf(page, "%llu\n",
4143 (unsigned long long)mddev->array_sectors/2);
4144 else
4145 return sprintf(page, "default\n");
4146}
4147
4148static ssize_t
4149array_size_store(mddev_t *mddev, const char *buf, size_t len)
4150{
4151 sector_t sectors;
4152
4153 if (strncmp(buf, "default", 7) == 0) {
4154 if (mddev->pers)
4155 sectors = mddev->pers->size(mddev, 0, 0);
4156 else
4157 sectors = mddev->array_sectors;
4158
4159 mddev->external_size = 0;
4160 } else {
4161 if (strict_blocks_to_sectors(buf, §ors) < 0)
4162 return -EINVAL;
4163 if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4164 return -E2BIG;
4165
4166 mddev->external_size = 1;
4167 }
4168
4169 mddev->array_sectors = sectors;
4170 if (mddev->pers) {
4171 set_capacity(mddev->gendisk, mddev->array_sectors);
4172 revalidate_disk(mddev->gendisk);
4173 }
4174 return len;
4175}
4176
4177static struct md_sysfs_entry md_array_size =
4178__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4179 array_size_store);
4180
4181static struct attribute *md_default_attrs[] = {
4182 &md_level.attr,
4183 &md_layout.attr,
4184 &md_raid_disks.attr,
4185 &md_chunk_size.attr,
4186 &md_size.attr,
4187 &md_resync_start.attr,
4188 &md_metadata.attr,
4189 &md_new_device.attr,
4190 &md_safe_delay.attr,
4191 &md_array_state.attr,
4192 &md_reshape_position.attr,
4193 &md_array_size.attr,
4194 &max_corr_read_errors.attr,
4195 NULL,
4196};
4197
4198static struct attribute *md_redundancy_attrs[] = {
4199 &md_scan_mode.attr,
4200 &md_mismatches.attr,
4201 &md_sync_min.attr,
4202 &md_sync_max.attr,
4203 &md_sync_speed.attr,
4204 &md_sync_force_parallel.attr,
4205 &md_sync_completed.attr,
4206 &md_min_sync.attr,
4207 &md_max_sync.attr,
4208 &md_suspend_lo.attr,
4209 &md_suspend_hi.attr,
4210 &md_bitmap.attr,
4211 &md_degraded.attr,
4212 NULL,
4213};
4214static struct attribute_group md_redundancy_group = {
4215 .name = NULL,
4216 .attrs = md_redundancy_attrs,
4217};
4218
4219
4220static ssize_t
4221md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4222{
4223 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4224 mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
4225 ssize_t rv;
4226
4227 if (!entry->show)
4228 return -EIO;
4229 rv = mddev_lock(mddev);
4230 if (!rv) {
4231 rv = entry->show(mddev, page);
4232 mddev_unlock(mddev);
4233 }
4234 return rv;
4235}
4236
4237static ssize_t
4238md_attr_store(struct kobject *kobj, struct attribute *attr,
4239 const char *page, size_t length)
4240{
4241 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4242 mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
4243 ssize_t rv;
4244
4245 if (!entry->store)
4246 return -EIO;
4247 if (!capable(CAP_SYS_ADMIN))
4248 return -EACCES;
4249 rv = mddev_lock(mddev);
4250 if (mddev->hold_active == UNTIL_IOCTL)
4251 mddev->hold_active = 0;
4252 if (!rv) {
4253 rv = entry->store(mddev, page, length);
4254 mddev_unlock(mddev);
4255 }
4256 return rv;
4257}
4258
4259static void md_free(struct kobject *ko)
4260{
4261 mddev_t *mddev = container_of(ko, mddev_t, kobj);
4262
4263 if (mddev->sysfs_state)
4264 sysfs_put(mddev->sysfs_state);
4265
4266 if (mddev->gendisk) {
4267 del_gendisk(mddev->gendisk);
4268 put_disk(mddev->gendisk);
4269 }
4270 if (mddev->queue)
4271 blk_cleanup_queue(mddev->queue);
4272
4273 kfree(mddev);
4274}
4275
4276static const struct sysfs_ops md_sysfs_ops = {
4277 .show = md_attr_show,
4278 .store = md_attr_store,
4279};
4280static struct kobj_type md_ktype = {
4281 .release = md_free,
4282 .sysfs_ops = &md_sysfs_ops,
4283 .default_attrs = md_default_attrs,
4284};
4285
4286int mdp_major = 0;
4287
4288static void mddev_delayed_delete(struct work_struct *ws)
4289{
4290 mddev_t *mddev = container_of(ws, mddev_t, del_work);
4291
4292 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
4293 kobject_del(&mddev->kobj);
4294 kobject_put(&mddev->kobj);
4295}
4296
4297static int md_alloc(dev_t dev, char *name)
4298{
4299 static DEFINE_MUTEX(disks_mutex);
4300 mddev_t *mddev = mddev_find(dev);
4301 struct gendisk *disk;
4302 int partitioned;
4303 int shift;
4304 int unit;
4305 int error;
4306
4307 if (!mddev)
4308 return -ENODEV;
4309
4310 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
4311 shift = partitioned ? MdpMinorShift : 0;
4312 unit = MINOR(mddev->unit) >> shift;
4313
4314
4315
4316
4317 flush_workqueue(md_misc_wq);
4318
4319 mutex_lock(&disks_mutex);
4320 error = -EEXIST;
4321 if (mddev->gendisk)
4322 goto abort;
4323
4324 if (name) {
4325
4326
4327 mddev_t *mddev2;
4328 spin_lock(&all_mddevs_lock);
4329
4330 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
4331 if (mddev2->gendisk &&
4332 strcmp(mddev2->gendisk->disk_name, name) == 0) {
4333 spin_unlock(&all_mddevs_lock);
4334 goto abort;
4335 }
4336 spin_unlock(&all_mddevs_lock);
4337 }
4338
4339 error = -ENOMEM;
4340 mddev->queue = blk_alloc_queue(GFP_KERNEL);
4341 if (!mddev->queue)
4342 goto abort;
4343 mddev->queue->queuedata = mddev;
4344
4345 blk_queue_make_request(mddev->queue, md_make_request);
4346
4347 disk = alloc_disk(1 << shift);
4348 if (!disk) {
4349 blk_cleanup_queue(mddev->queue);
4350 mddev->queue = NULL;
4351 goto abort;
4352 }
4353 disk->major = MAJOR(mddev->unit);
4354 disk->first_minor = unit << shift;
4355 if (name)
4356 strcpy(disk->disk_name, name);
4357 else if (partitioned)
4358 sprintf(disk->disk_name, "md_d%d", unit);
4359 else
4360 sprintf(disk->disk_name, "md%d", unit);
4361 disk->fops = &md_fops;
4362 disk->private_data = mddev;
4363 disk->queue = mddev->queue;
4364 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
4365
4366
4367
4368
4369 disk->flags |= GENHD_FL_EXT_DEVT;
4370 mddev->gendisk = disk;
4371
4372
4373
4374 mutex_lock(&mddev->open_mutex);
4375 add_disk(disk);
4376
4377 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
4378 &disk_to_dev(disk)->kobj, "%s", "md");
4379 if (error) {
4380
4381
4382
4383 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
4384 disk->disk_name);
4385 error = 0;
4386 }
4387 if (mddev->kobj.sd &&
4388 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
4389 printk(KERN_DEBUG "pointless warning\n");
4390 mutex_unlock(&mddev->open_mutex);
4391 abort:
4392 mutex_unlock(&disks_mutex);
4393 if (!error && mddev->kobj.sd) {
4394 kobject_uevent(&mddev->kobj, KOBJ_ADD);
4395 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
4396 }
4397 mddev_put(mddev);
4398 return error;
4399}
4400
4401static struct kobject *md_probe(dev_t dev, int *part, void *data)
4402{
4403 md_alloc(dev, NULL);
4404 return NULL;
4405}
4406
4407static int add_named_array(const char *val, struct kernel_param *kp)
4408{
4409
4410
4411
4412
4413 int len = strlen(val);
4414 char buf[DISK_NAME_LEN];
4415
4416 while (len && val[len-1] == '\n')
4417 len--;
4418 if (len >= DISK_NAME_LEN)
4419 return -E2BIG;
4420 strlcpy(buf, val, len+1);
4421 if (strncmp(buf, "md_", 3) != 0)
4422 return -EINVAL;
4423 return md_alloc(0, buf);
4424}
4425
4426static void md_safemode_timeout(unsigned long data)
4427{
4428 mddev_t *mddev = (mddev_t *) data;
4429
4430 if (!atomic_read(&mddev->writes_pending)) {
4431 mddev->safemode = 1;
4432 if (mddev->external)
4433 sysfs_notify_dirent_safe(mddev->sysfs_state);
4434 }
4435 md_wakeup_thread(mddev->thread);
4436}
4437
4438static int start_dirty_degraded;
4439
4440int md_run(mddev_t *mddev)
4441{
4442 int err;
4443 mdk_rdev_t *rdev;
4444 struct mdk_personality *pers;
4445
4446 if (list_empty(&mddev->disks))
4447
4448 return -EINVAL;
4449
4450 if (mddev->pers)
4451 return -EBUSY;
4452
4453 if (mddev->sysfs_active)
4454 return -EBUSY;
4455
4456
4457
4458
4459 if (!mddev->raid_disks) {
4460 if (!mddev->persistent)
4461 return -EINVAL;
4462 analyze_sbs(mddev);
4463 }
4464
4465 if (mddev->level != LEVEL_NONE)
4466 request_module("md-level-%d", mddev->level);
4467 else if (mddev->clevel[0])
4468 request_module("md-%s", mddev->clevel);
4469
4470
4471
4472
4473
4474
4475 list_for_each_entry(rdev, &mddev->disks, same_set) {
4476 if (test_bit(Faulty, &rdev->flags))
4477 continue;
4478 sync_blockdev(rdev->bdev);
4479 invalidate_bdev(rdev->bdev);
4480
4481
4482
4483
4484
4485 if (rdev->meta_bdev) {
4486 ;
4487 } else if (rdev->data_offset < rdev->sb_start) {
4488 if (mddev->dev_sectors &&
4489 rdev->data_offset + mddev->dev_sectors
4490 > rdev->sb_start) {
4491 printk("md: %s: data overlaps metadata\n",
4492 mdname(mddev));
4493 return -EINVAL;
4494 }
4495 } else {
4496 if (rdev->sb_start + rdev->sb_size/512
4497 > rdev->data_offset) {
4498 printk("md: %s: metadata overlaps data\n",
4499 mdname(mddev));
4500 return -EINVAL;
4501 }
4502 }
4503 sysfs_notify_dirent_safe(rdev->sysfs_state);
4504 }
4505
4506 if (mddev->bio_set == NULL)
4507 mddev->bio_set = bioset_create(BIO_POOL_SIZE, sizeof(mddev));
4508
4509 spin_lock(&pers_lock);
4510 pers = find_pers(mddev->level, mddev->clevel);
4511 if (!pers || !try_module_get(pers->owner)) {
4512 spin_unlock(&pers_lock);
4513 if (mddev->level != LEVEL_NONE)
4514 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
4515 mddev->level);
4516 else
4517 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
4518 mddev->clevel);
4519 return -EINVAL;
4520 }
4521 mddev->pers = pers;
4522 spin_unlock(&pers_lock);
4523 if (mddev->level != pers->level) {
4524 mddev->level = pers->level;
4525 mddev->new_level = pers->level;
4526 }
4527 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4528
4529 if (mddev->reshape_position != MaxSector &&
4530 pers->start_reshape == NULL) {
4531
4532 mddev->pers = NULL;
4533 module_put(pers->owner);
4534 return -EINVAL;
4535 }
4536
4537 if (pers->sync_request) {
4538
4539
4540
4541 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
4542 mdk_rdev_t *rdev2;
4543 int warned = 0;
4544
4545 list_for_each_entry(rdev, &mddev->disks, same_set)
4546 list_for_each_entry(rdev2, &mddev->disks, same_set) {
4547 if (rdev < rdev2 &&
4548 rdev->bdev->bd_contains ==
4549 rdev2->bdev->bd_contains) {
4550 printk(KERN_WARNING
4551 "%s: WARNING: %s appears to be"
4552 " on the same physical disk as"
4553 " %s.\n",
4554 mdname(mddev),
4555 bdevname(rdev->bdev,b),
4556 bdevname(rdev2->bdev,b2));
4557 warned = 1;
4558 }
4559 }
4560
4561 if (warned)
4562 printk(KERN_WARNING
4563 "True protection against single-disk"
4564 " failure might be compromised.\n");
4565 }
4566
4567 mddev->recovery = 0;
4568
4569 mddev->resync_max_sectors = mddev->dev_sectors;
4570
4571 mddev->ok_start_degraded = start_dirty_degraded;
4572
4573 if (start_readonly && mddev->ro == 0)
4574 mddev->ro = 2;
4575
4576 err = mddev->pers->run(mddev);
4577 if (err)
4578 printk(KERN_ERR "md: pers->run() failed ...\n");
4579 else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
4580 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
4581 " but 'external_size' not in effect?\n", __func__);
4582 printk(KERN_ERR
4583 "md: invalid array_size %llu > default size %llu\n",
4584 (unsigned long long)mddev->array_sectors / 2,
4585 (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
4586 err = -EINVAL;
4587 mddev->pers->stop(mddev);
4588 }
4589 if (err == 0 && mddev->pers->sync_request) {
4590 err = bitmap_create(mddev);
4591 if (err) {
4592 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
4593 mdname(mddev), err);
4594 mddev->pers->stop(mddev);
4595 }
4596 }
4597 if (err) {
4598 module_put(mddev->pers->owner);
4599 mddev->pers = NULL;
4600 bitmap_destroy(mddev);
4601 return err;
4602 }
4603 if (mddev->pers->sync_request) {
4604 if (mddev->kobj.sd &&
4605 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4606 printk(KERN_WARNING
4607 "md: cannot register extra attributes for %s\n",
4608 mdname(mddev));
4609 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
4610 } else if (mddev->ro == 2)
4611 mddev->ro = 0;
4612
4613 atomic_set(&mddev->writes_pending,0);
4614 atomic_set(&mddev->max_corr_read_errors,
4615 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
4616 mddev->safemode = 0;
4617 mddev->safemode_timer.function = md_safemode_timeout;
4618 mddev->safemode_timer.data = (unsigned long) mddev;
4619 mddev->safemode_delay = (200 * HZ)/1000 +1;
4620 mddev->in_sync = 1;
4621 smp_wmb();
4622 mddev->ready = 1;
4623 list_for_each_entry(rdev, &mddev->disks, same_set)
4624 if (rdev->raid_disk >= 0) {
4625 char nm[20];
4626 sprintf(nm, "rd%d", rdev->raid_disk);
4627 if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
4628 ;
4629 }
4630
4631 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4632
4633 if (mddev->flags)
4634 md_update_sb(mddev, 0);
4635
4636 md_new_event(mddev);
4637 sysfs_notify_dirent_safe(mddev->sysfs_state);
4638 sysfs_notify_dirent_safe(mddev->sysfs_action);
4639 sysfs_notify(&mddev->kobj, NULL, "degraded");
4640 return 0;
4641}
4642EXPORT_SYMBOL_GPL(md_run);
4643
4644static int do_md_run(mddev_t *mddev)
4645{
4646 int err;
4647
4648 err = md_run(mddev);
4649 if (err)
4650 goto out;
4651 err = bitmap_load(mddev);
4652 if (err) {
4653 bitmap_destroy(mddev);
4654 goto out;
4655 }
4656
4657 md_wakeup_thread(mddev->thread);
4658 md_wakeup_thread(mddev->sync_thread);
4659
4660 set_capacity(mddev->gendisk, mddev->array_sectors);
4661 revalidate_disk(mddev->gendisk);
4662 mddev->changed = 1;
4663 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
4664out:
4665 return err;
4666}
4667
4668static int restart_array(mddev_t *mddev)
4669{
4670 struct gendisk *disk = mddev->gendisk;
4671
4672
4673 if (list_empty(&mddev->disks))
4674 return -ENXIO;
4675 if (!mddev->pers)
4676 return -EINVAL;
4677 if (!mddev->ro)
4678 return -EBUSY;
4679 mddev->safemode = 0;
4680 mddev->ro = 0;
4681 set_disk_ro(disk, 0);
4682 printk(KERN_INFO "md: %s switched to read-write mode.\n",
4683 mdname(mddev));
4684
4685 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4686 md_wakeup_thread(mddev->thread);
4687 md_wakeup_thread(mddev->sync_thread);
4688 sysfs_notify_dirent_safe(mddev->sysfs_state);
4689 return 0;
4690}
4691
4692
4693
4694static int deny_bitmap_write_access(struct file * file)
4695{
4696 struct inode *inode = file->f_mapping->host;
4697
4698 spin_lock(&inode->i_lock);
4699 if (atomic_read(&inode->i_writecount) > 1) {
4700 spin_unlock(&inode->i_lock);
4701 return -ETXTBSY;
4702 }
4703 atomic_set(&inode->i_writecount, -1);
4704 spin_unlock(&inode->i_lock);
4705
4706 return 0;
4707}
4708
4709void restore_bitmap_write_access(struct file *file)
4710{
4711 struct inode *inode = file->f_mapping->host;
4712
4713 spin_lock(&inode->i_lock);
4714 atomic_set(&inode->i_writecount, 1);
4715 spin_unlock(&inode->i_lock);
4716}
4717
4718static void md_clean(mddev_t *mddev)
4719{
4720 mddev->array_sectors = 0;
4721 mddev->external_size = 0;
4722 mddev->dev_sectors = 0;
4723 mddev->raid_disks = 0;
4724 mddev->recovery_cp = 0;
4725 mddev->resync_min = 0;
4726 mddev->resync_max = MaxSector;
4727 mddev->reshape_position = MaxSector;
4728 mddev->external = 0;
4729 mddev->persistent = 0;
4730 mddev->level = LEVEL_NONE;
4731 mddev->clevel[0] = 0;
4732 mddev->flags = 0;
4733 mddev->ro = 0;
4734 mddev->metadata_type[0] = 0;
4735 mddev->chunk_sectors = 0;
4736 mddev->ctime = mddev->utime = 0;
4737 mddev->layout = 0;
4738 mddev->max_disks = 0;
4739 mddev->events = 0;
4740 mddev->can_decrease_events = 0;
4741 mddev->delta_disks = 0;
4742 mddev->new_level = LEVEL_NONE;
4743 mddev->new_layout = 0;
4744 mddev->new_chunk_sectors = 0;
4745 mddev->curr_resync = 0;
4746 mddev->resync_mismatches = 0;
4747 mddev->suspend_lo = mddev->suspend_hi = 0;
4748 mddev->sync_speed_min = mddev->sync_speed_max = 0;
4749 mddev->recovery = 0;
4750 mddev->in_sync = 0;
4751 mddev->changed = 0;
4752 mddev->degraded = 0;
4753 mddev->safemode = 0;
4754 mddev->bitmap_info.offset = 0;
4755 mddev->bitmap_info.default_offset = 0;
4756 mddev->bitmap_info.chunksize = 0;
4757 mddev->bitmap_info.daemon_sleep = 0;
4758 mddev->bitmap_info.max_write_behind = 0;
4759}
4760
4761static void __md_stop_writes(mddev_t *mddev)
4762{
4763 if (mddev->sync_thread) {
4764 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4765 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4766 reap_sync_thread(mddev);
4767 }
4768
4769 del_timer_sync(&mddev->safemode_timer);
4770
4771 bitmap_flush(mddev);
4772 md_super_wait(mddev);
4773
4774 if (!mddev->in_sync || mddev->flags) {
4775
4776 mddev->in_sync = 1;
4777 md_update_sb(mddev, 1);
4778 }
4779}
4780
4781void md_stop_writes(mddev_t *mddev)
4782{
4783 mddev_lock(mddev);
4784 __md_stop_writes(mddev);
4785 mddev_unlock(mddev);
4786}
4787EXPORT_SYMBOL_GPL(md_stop_writes);
4788
4789void md_stop(mddev_t *mddev)
4790{
4791 mddev->ready = 0;
4792 mddev->pers->stop(mddev);
4793 if (mddev->pers->sync_request && mddev->to_remove == NULL)
4794 mddev->to_remove = &md_redundancy_group;
4795 module_put(mddev->pers->owner);
4796 mddev->pers = NULL;
4797 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4798}
4799EXPORT_SYMBOL_GPL(md_stop);
4800
4801static int md_set_readonly(mddev_t *mddev, int is_open)
4802{
4803 int err = 0;
4804 mutex_lock(&mddev->open_mutex);
4805 if (atomic_read(&mddev->openers) > is_open) {
4806 printk("md: %s still in use.\n",mdname(mddev));
4807 err = -EBUSY;
4808 goto out;
4809 }
4810 if (mddev->pers) {
4811 __md_stop_writes(mddev);
4812
4813 err = -ENXIO;
4814 if (mddev->ro==1)
4815 goto out;
4816 mddev->ro = 1;
4817 set_disk_ro(mddev->gendisk, 1);
4818 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4819 sysfs_notify_dirent_safe(mddev->sysfs_state);
4820 err = 0;
4821 }
4822out:
4823 mutex_unlock(&mddev->open_mutex);
4824 return err;
4825}
4826
4827
4828
4829
4830
4831static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4832{
4833 struct gendisk *disk = mddev->gendisk;
4834 mdk_rdev_t *rdev;
4835
4836 mutex_lock(&mddev->open_mutex);
4837 if (atomic_read(&mddev->openers) > is_open ||
4838 mddev->sysfs_active) {
4839 printk("md: %s still in use.\n",mdname(mddev));
4840 mutex_unlock(&mddev->open_mutex);
4841 return -EBUSY;
4842 }
4843
4844 if (mddev->pers) {
4845 if (mddev->ro)
4846 set_disk_ro(disk, 0);
4847
4848 __md_stop_writes(mddev);
4849 md_stop(mddev);
4850 mddev->queue->merge_bvec_fn = NULL;
4851 mddev->queue->backing_dev_info.congested_fn = NULL;
4852
4853
4854 sysfs_notify_dirent_safe(mddev->sysfs_state);
4855
4856 list_for_each_entry(rdev, &mddev->disks, same_set)
4857 if (rdev->raid_disk >= 0) {
4858 char nm[20];
4859 sprintf(nm, "rd%d", rdev->raid_disk);
4860 sysfs_remove_link(&mddev->kobj, nm);
4861 }
4862
4863 set_capacity(disk, 0);
4864 mutex_unlock(&mddev->open_mutex);
4865 mddev->changed = 1;
4866 revalidate_disk(disk);
4867
4868 if (mddev->ro)
4869 mddev->ro = 0;
4870 } else
4871 mutex_unlock(&mddev->open_mutex);
4872
4873
4874
4875 if (mode == 0) {
4876 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
4877
4878 bitmap_destroy(mddev);
4879 if (mddev->bitmap_info.file) {
4880 restore_bitmap_write_access(mddev->bitmap_info.file);
4881 fput(mddev->bitmap_info.file);
4882 mddev->bitmap_info.file = NULL;
4883 }
4884 mddev->bitmap_info.offset = 0;
4885
4886 export_array(mddev);
4887
4888 md_clean(mddev);
4889 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
4890 if (mddev->hold_active == UNTIL_STOP)
4891 mddev->hold_active = 0;
4892 }
4893 blk_integrity_unregister(disk);
4894 md_new_event(mddev);
4895 sysfs_notify_dirent_safe(mddev->sysfs_state);
4896 return 0;
4897}
4898
4899#ifndef MODULE
4900static void autorun_array(mddev_t *mddev)
4901{
4902 mdk_rdev_t *rdev;
4903 int err;
4904
4905 if (list_empty(&mddev->disks))
4906 return;
4907
4908 printk(KERN_INFO "md: running: ");
4909
4910 list_for_each_entry(rdev, &mddev->disks, same_set) {
4911 char b[BDEVNAME_SIZE];
4912 printk("<%s>", bdevname(rdev->bdev,b));
4913 }
4914 printk("\n");
4915
4916 err = do_md_run(mddev);
4917 if (err) {
4918 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
4919 do_md_stop(mddev, 0, 0);
4920 }
4921}
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935static void autorun_devices(int part)
4936{
4937 mdk_rdev_t *rdev0, *rdev, *tmp;
4938 mddev_t *mddev;
4939 char b[BDEVNAME_SIZE];
4940
4941 printk(KERN_INFO "md: autorun ...\n");
4942 while (!list_empty(&pending_raid_disks)) {
4943 int unit;
4944 dev_t dev;
4945 LIST_HEAD(candidates);
4946 rdev0 = list_entry(pending_raid_disks.next,
4947 mdk_rdev_t, same_set);
4948
4949 printk(KERN_INFO "md: considering %s ...\n",
4950 bdevname(rdev0->bdev,b));
4951 INIT_LIST_HEAD(&candidates);
4952 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
4953 if (super_90_load(rdev, rdev0, 0) >= 0) {
4954 printk(KERN_INFO "md: adding %s ...\n",
4955 bdevname(rdev->bdev,b));
4956 list_move(&rdev->same_set, &candidates);
4957 }
4958
4959
4960
4961
4962
4963 if (part) {
4964 dev = MKDEV(mdp_major,
4965 rdev0->preferred_minor << MdpMinorShift);
4966 unit = MINOR(dev) >> MdpMinorShift;
4967 } else {
4968 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
4969 unit = MINOR(dev);
4970 }
4971 if (rdev0->preferred_minor != unit) {
4972 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
4973 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
4974 break;
4975 }
4976
4977 md_probe(dev, NULL, NULL);
4978 mddev = mddev_find(dev);
4979 if (!mddev || !mddev->gendisk) {
4980 if (mddev)
4981 mddev_put(mddev);
4982 printk(KERN_ERR
4983 "md: cannot allocate memory for md drive.\n");
4984 break;
4985 }
4986 if (mddev_lock(mddev))
4987 printk(KERN_WARNING "md: %s locked, cannot run\n",
4988 mdname(mddev));
4989 else if (mddev->raid_disks || mddev->major_version
4990 || !list_empty(&mddev->disks)) {
4991 printk(KERN_WARNING
4992 "md: %s already running, cannot run %s\n",
4993 mdname(mddev), bdevname(rdev0->bdev,b));
4994 mddev_unlock(mddev);
4995 } else {
4996 printk(KERN_INFO "md: created %s\n", mdname(mddev));
4997 mddev->persistent = 1;
4998 rdev_for_each_list(rdev, tmp, &candidates) {
4999 list_del_init(&rdev->same_set);
5000 if (bind_rdev_to_array(rdev, mddev))
5001 export_rdev(rdev);
5002 }
5003 autorun_array(mddev);
5004 mddev_unlock(mddev);
5005 }
5006
5007
5008
5009 rdev_for_each_list(rdev, tmp, &candidates) {
5010 list_del_init(&rdev->same_set);
5011 export_rdev(rdev);
5012 }
5013 mddev_put(mddev);
5014 }
5015 printk(KERN_INFO "md: ... autorun DONE.\n");
5016}
5017#endif
5018
5019static int get_version(void __user * arg)
5020{
5021 mdu_version_t ver;
5022
5023 ver.major = MD_MAJOR_VERSION;
5024 ver.minor = MD_MINOR_VERSION;
5025 ver.patchlevel = MD_PATCHLEVEL_VERSION;
5026
5027 if (copy_to_user(arg, &ver, sizeof(ver)))
5028 return -EFAULT;
5029
5030 return 0;
5031}
5032
5033static int get_array_info(mddev_t * mddev, void __user * arg)
5034{
5035 mdu_array_info_t info;
5036 int nr,working,insync,failed,spare;
5037 mdk_rdev_t *rdev;
5038
5039 nr=working=insync=failed=spare=0;
5040 list_for_each_entry(rdev, &mddev->disks, same_set) {
5041 nr++;
5042 if (test_bit(Faulty, &rdev->flags))
5043 failed++;
5044 else {
5045 working++;
5046 if (test_bit(In_sync, &rdev->flags))
5047 insync++;
5048 else
5049 spare++;
5050 }
5051 }
5052
5053 info.major_version = mddev->major_version;
5054 info.minor_version = mddev->minor_version;
5055 info.patch_version = MD_PATCHLEVEL_VERSION;
5056 info.ctime = mddev->ctime;
5057 info.level = mddev->level;
5058 info.size = mddev->dev_sectors / 2;
5059 if (info.size != mddev->dev_sectors / 2)
5060 info.size = -1;
5061 info.nr_disks = nr;
5062 info.raid_disks = mddev->raid_disks;
5063 info.md_minor = mddev->md_minor;
5064 info.not_persistent= !mddev->persistent;
5065
5066 info.utime = mddev->utime;
5067 info.state = 0;
5068 if (mddev->in_sync)
5069 info.state = (1<<MD_SB_CLEAN);
5070 if (mddev->bitmap && mddev->bitmap_info.offset)
5071 info.state = (1<<MD_SB_BITMAP_PRESENT);
5072 info.active_disks = insync;
5073 info.working_disks = working;
5074 info.failed_disks = failed;
5075 info.spare_disks = spare;
5076
5077 info.layout = mddev->layout;
5078 info.chunk_size = mddev->chunk_sectors << 9;
5079
5080 if (copy_to_user(arg, &info, sizeof(info)))
5081 return -EFAULT;
5082
5083 return 0;
5084}
5085
5086static int get_bitmap_file(mddev_t * mddev, void __user * arg)
5087{
5088 mdu_bitmap_file_t *file = NULL;
5089 char *ptr, *buf = NULL;
5090 int err = -ENOMEM;
5091
5092 if (md_allow_write(mddev))
5093 file = kmalloc(sizeof(*file), GFP_NOIO);
5094 else
5095 file = kmalloc(sizeof(*file), GFP_KERNEL);
5096
5097 if (!file)
5098 goto out;
5099
5100
5101 if (!mddev->bitmap || !mddev->bitmap->file) {
5102 file->pathname[0] = '\0';
5103 goto copy_out;
5104 }
5105
5106 buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
5107 if (!buf)
5108 goto out;
5109
5110 ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname));
5111 if (IS_ERR(ptr))
5112 goto out;
5113
5114 strcpy(file->pathname, ptr);
5115
5116copy_out:
5117 err = 0;
5118 if (copy_to_user(arg, file, sizeof(*file)))
5119 err = -EFAULT;
5120out:
5121 kfree(buf);
5122 kfree(file);
5123 return err;
5124}
5125
5126static int get_disk_info(mddev_t * mddev, void __user * arg)
5127{
5128 mdu_disk_info_t info;
5129 mdk_rdev_t *rdev;
5130
5131 if (copy_from_user(&info, arg, sizeof(info)))
5132 return -EFAULT;
5133
5134 rdev = find_rdev_nr(mddev, info.number);
5135 if (rdev) {
5136 info.major = MAJOR(rdev->bdev->bd_dev);
5137 info.minor = MINOR(rdev->bdev->bd_dev);
5138 info.raid_disk = rdev->raid_disk;
5139 info.state = 0;
5140 if (test_bit(Faulty, &rdev->flags))
5141 info.state |= (1<<MD_DISK_FAULTY);
5142 else if (test_bit(In_sync, &rdev->flags)) {
5143 info.state |= (1<<MD_DISK_ACTIVE);
5144 info.state |= (1<<MD_DISK_SYNC);
5145 }
5146 if (test_bit(WriteMostly, &rdev->flags))
5147 info.state |= (1<<MD_DISK_WRITEMOSTLY);
5148 } else {
5149 info.major = info.minor = 0;
5150 info.raid_disk = -1;
5151 info.state = (1<<MD_DISK_REMOVED);
5152 }
5153
5154 if (copy_to_user(arg, &info, sizeof(info)))
5155 return -EFAULT;
5156
5157 return 0;
5158}
5159
5160static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
5161{
5162 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5163 mdk_rdev_t *rdev;
5164 dev_t dev = MKDEV(info->major,info->minor);
5165
5166 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
5167 return -EOVERFLOW;
5168
5169 if (!mddev->raid_disks) {
5170 int err;
5171
5172 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
5173 if (IS_ERR(rdev)) {
5174 printk(KERN_WARNING
5175 "md: md_import_device returned %ld\n",
5176 PTR_ERR(rdev));
5177 return PTR_ERR(rdev);
5178 }
5179 if (!list_empty(&mddev->disks)) {
5180 mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
5181 mdk_rdev_t, same_set);
5182 err = super_types[mddev->major_version]
5183 .load_super(rdev, rdev0, mddev->minor_version);
5184 if (err < 0) {
5185 printk(KERN_WARNING
5186 "md: %s has different UUID to %s\n",
5187 bdevname(rdev->bdev,b),
5188 bdevname(rdev0->bdev,b2));
5189 export_rdev(rdev);
5190 return -EINVAL;
5191 }
5192 }
5193 err = bind_rdev_to_array(rdev, mddev);
5194 if (err)
5195 export_rdev(rdev);
5196 return err;
5197 }
5198
5199
5200
5201
5202
5203
5204 if (mddev->pers) {
5205 int err;
5206 if (!mddev->pers->hot_add_disk) {
5207 printk(KERN_WARNING
5208 "%s: personality does not support diskops!\n",
5209 mdname(mddev));
5210 return -EINVAL;
5211 }
5212 if (mddev->persistent)
5213 rdev = md_import_device(dev, mddev->major_version,
5214 mddev->minor_version);
5215 else
5216 rdev = md_import_device(dev, -1, -1);
5217 if (IS_ERR(rdev)) {
5218 printk(KERN_WARNING
5219 "md: md_import_device returned %ld\n",
5220 PTR_ERR(rdev));
5221 return PTR_ERR(rdev);
5222 }
5223
5224 if (!mddev->persistent) {
5225 if (info->state & (1<<MD_DISK_SYNC) &&
5226 info->raid_disk < mddev->raid_disks) {
5227 rdev->raid_disk = info->raid_disk;
5228 set_bit(In_sync, &rdev->flags);
5229 } else
5230 rdev->raid_disk = -1;
5231 } else
5232 super_types[mddev->major_version].
5233 validate_super(mddev, rdev);
5234 if ((info->state & (1<<MD_DISK_SYNC)) &&
5235 (!test_bit(In_sync, &rdev->flags) ||
5236 rdev->raid_disk != info->raid_disk)) {
5237
5238
5239
5240 export_rdev(rdev);
5241 return -EINVAL;
5242 }
5243
5244 if (test_bit(In_sync, &rdev->flags))
5245 rdev->saved_raid_disk = rdev->raid_disk;
5246 else
5247 rdev->saved_raid_disk = -1;
5248
5249 clear_bit(In_sync, &rdev->flags);
5250 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5251 set_bit(WriteMostly, &rdev->flags);
5252 else
5253 clear_bit(WriteMostly, &rdev->flags);
5254
5255 rdev->raid_disk = -1;
5256 err = bind_rdev_to_array(rdev, mddev);
5257 if (!err && !mddev->pers->hot_remove_disk) {
5258
5259
5260
5261
5262 super_types[mddev->major_version].
5263 validate_super(mddev, rdev);
5264 err = mddev->pers->hot_add_disk(mddev, rdev);
5265 if (err)
5266 unbind_rdev_from_array(rdev);
5267 }
5268 if (err)
5269 export_rdev(rdev);
5270 else
5271 sysfs_notify_dirent_safe(rdev->sysfs_state);
5272
5273 md_update_sb(mddev, 1);
5274 if (mddev->degraded)
5275 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5276 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5277 if (!err)
5278 md_new_event(mddev);
5279 md_wakeup_thread(mddev->thread);
5280 return err;
5281 }
5282
5283
5284
5285
5286 if (mddev->major_version != 0) {
5287 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
5288 mdname(mddev));
5289 return -EINVAL;
5290 }
5291
5292 if (!(info->state & (1<<MD_DISK_FAULTY))) {
5293 int err;
5294 rdev = md_import_device(dev, -1, 0);
5295 if (IS_ERR(rdev)) {
5296 printk(KERN_WARNING
5297 "md: error, md_import_device() returned %ld\n",
5298 PTR_ERR(rdev));
5299 return PTR_ERR(rdev);
5300 }
5301 rdev->desc_nr = info->number;
5302 if (info->raid_disk < mddev->raid_disks)
5303 rdev->raid_disk = info->raid_disk;
5304 else
5305 rdev->raid_disk = -1;
5306
5307 if (rdev->raid_disk < mddev->raid_disks)
5308 if (info->state & (1<<MD_DISK_SYNC))
5309 set_bit(In_sync, &rdev->flags);
5310
5311 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5312 set_bit(WriteMostly, &rdev->flags);
5313
5314 if (!mddev->persistent) {
5315 printk(KERN_INFO "md: nonpersistent superblock ...\n");
5316 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5317 } else
5318 rdev->sb_start = calc_dev_sboffset(rdev);
5319 rdev->sectors = rdev->sb_start;
5320
5321 err = bind_rdev_to_array(rdev, mddev);
5322 if (err) {
5323 export_rdev(rdev);
5324 return err;
5325 }
5326 }
5327
5328 return 0;
5329}
5330
5331static int hot_remove_disk(mddev_t * mddev, dev_t dev)
5332{
5333 char b[BDEVNAME_SIZE];
5334 mdk_rdev_t *rdev;
5335
5336 rdev = find_rdev(mddev, dev);
5337 if (!rdev)
5338 return -ENXIO;
5339
5340 if (rdev->raid_disk >= 0)
5341 goto busy;
5342
5343 kick_rdev_from_array(rdev);
5344 md_update_sb(mddev, 1);
5345 md_new_event(mddev);
5346
5347 return 0;
5348busy:
5349 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
5350 bdevname(rdev->bdev,b), mdname(mddev));
5351 return -EBUSY;
5352}
5353
5354static int hot_add_disk(mddev_t * mddev, dev_t dev)
5355{
5356 char b[BDEVNAME_SIZE];
5357 int err;
5358 mdk_rdev_t *rdev;
5359
5360 if (!mddev->pers)
5361 return -ENODEV;
5362
5363 if (mddev->major_version != 0) {
5364 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
5365 " version-0 superblocks.\n",
5366 mdname(mddev));
5367 return -EINVAL;
5368 }
5369 if (!mddev->pers->hot_add_disk) {
5370 printk(KERN_WARNING
5371 "%s: personality does not support diskops!\n",
5372 mdname(mddev));
5373 return -EINVAL;
5374 }
5375
5376 rdev = md_import_device(dev, -1, 0);
5377 if (IS_ERR(rdev)) {
5378 printk(KERN_WARNING
5379 "md: error, md_import_device() returned %ld\n",
5380 PTR_ERR(rdev));
5381 return -EINVAL;
5382 }
5383
5384 if (mddev->persistent)
5385 rdev->sb_start = calc_dev_sboffset(rdev);
5386 else
5387 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5388
5389 rdev->sectors = rdev->sb_start;
5390
5391 if (test_bit(Faulty, &rdev->flags)) {
5392 printk(KERN_WARNING
5393 "md: can not hot-add faulty %s disk to %s!\n",
5394 bdevname(rdev->bdev,b), mdname(mddev));
5395 err = -EINVAL;
5396 goto abort_export;
5397 }
5398 clear_bit(In_sync, &rdev->flags);
5399 rdev->desc_nr = -1;
5400 rdev->saved_raid_disk = -1;
5401 err = bind_rdev_to_array(rdev, mddev);
5402 if (err)
5403 goto abort_export;
5404
5405
5406
5407
5408
5409
5410 rdev->raid_disk = -1;
5411
5412 md_update_sb(mddev, 1);
5413
5414
5415
5416
5417
5418 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5419 md_wakeup_thread(mddev->thread);
5420 md_new_event(mddev);
5421 return 0;
5422
5423abort_export:
5424 export_rdev(rdev);
5425 return err;
5426}
5427
5428static int set_bitmap_file(mddev_t *mddev, int fd)
5429{
5430 int err;
5431
5432 if (mddev->pers) {
5433 if (!mddev->pers->quiesce)
5434 return -EBUSY;
5435 if (mddev->recovery || mddev->sync_thread)
5436 return -EBUSY;
5437
5438 }
5439
5440
5441 if (fd >= 0) {
5442 if (mddev->bitmap)
5443 return -EEXIST;
5444 mddev->bitmap_info.file = fget(fd);
5445
5446 if (mddev->bitmap_info.file == NULL) {
5447 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
5448 mdname(mddev));
5449 return -EBADF;
5450 }
5451
5452 err = deny_bitmap_write_access(mddev->bitmap_info.file);
5453 if (err) {
5454 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
5455 mdname(mddev));
5456 fput(mddev->bitmap_info.file);
5457 mddev->bitmap_info.file = NULL;
5458 return err;
5459 }
5460 mddev->bitmap_info.offset = 0;
5461 } else if (mddev->bitmap == NULL)
5462 return -ENOENT;
5463 err = 0;
5464 if (mddev->pers) {
5465 mddev->pers->quiesce(mddev, 1);
5466 if (fd >= 0) {
5467 err = bitmap_create(mddev);
5468 if (!err)
5469 err = bitmap_load(mddev);
5470 }
5471 if (fd < 0 || err) {
5472 bitmap_destroy(mddev);
5473 fd = -1;
5474 }
5475 mddev->pers->quiesce(mddev, 0);
5476 }
5477 if (fd < 0) {
5478 if (mddev->bitmap_info.file) {
5479 restore_bitmap_write_access(mddev->bitmap_info.file);
5480 fput(mddev->bitmap_info.file);
5481 }
5482 mddev->bitmap_info.file = NULL;
5483 }
5484
5485 return err;
5486}
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
5502{
5503
5504 if (info->raid_disks == 0) {
5505
5506 if (info->major_version < 0 ||
5507 info->major_version >= ARRAY_SIZE(super_types) ||
5508 super_types[info->major_version].name == NULL) {
5509
5510 printk(KERN_INFO
5511 "md: superblock version %d not known\n",
5512 info->major_version);
5513 return -EINVAL;
5514 }
5515 mddev->major_version = info->major_version;
5516 mddev->minor_version = info->minor_version;
5517 mddev->patch_version = info->patch_version;
5518 mddev->persistent = !info->not_persistent;
5519
5520
5521
5522 mddev->ctime = get_seconds();
5523 return 0;
5524 }
5525 mddev->major_version = MD_MAJOR_VERSION;
5526 mddev->minor_version = MD_MINOR_VERSION;
5527 mddev->patch_version = MD_PATCHLEVEL_VERSION;
5528 mddev->ctime = get_seconds();
5529
5530 mddev->level = info->level;
5531 mddev->clevel[0] = 0;
5532 mddev->dev_sectors = 2 * (sector_t)info->size;
5533 mddev->raid_disks = info->raid_disks;
5534
5535
5536
5537 if (info->state & (1<<MD_SB_CLEAN))
5538 mddev->recovery_cp = MaxSector;
5539 else
5540 mddev->recovery_cp = 0;
5541 mddev->persistent = ! info->not_persistent;
5542 mddev->external = 0;
5543
5544 mddev->layout = info->layout;
5545 mddev->chunk_sectors = info->chunk_size >> 9;
5546
5547 mddev->max_disks = MD_SB_DISKS;
5548
5549 if (mddev->persistent)
5550 mddev->flags = 0;
5551 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5552
5553 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
5554 mddev->bitmap_info.offset = 0;
5555
5556 mddev->reshape_position = MaxSector;
5557
5558
5559
5560
5561 get_random_bytes(mddev->uuid, 16);
5562
5563 mddev->new_level = mddev->level;
5564 mddev->new_chunk_sectors = mddev->chunk_sectors;
5565 mddev->new_layout = mddev->layout;
5566 mddev->delta_disks = 0;
5567
5568 return 0;
5569}
5570
5571void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors)
5572{
5573 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
5574
5575 if (mddev->external_size)
5576 return;
5577
5578 mddev->array_sectors = array_sectors;
5579}
5580EXPORT_SYMBOL(md_set_array_sectors);
5581
5582static int update_size(mddev_t *mddev, sector_t num_sectors)
5583{
5584 mdk_rdev_t *rdev;
5585 int rv;
5586 int fit = (num_sectors == 0);
5587
5588 if (mddev->pers->resize == NULL)
5589 return -EINVAL;
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599 if (mddev->sync_thread)
5600 return -EBUSY;
5601 if (mddev->bitmap)
5602
5603
5604
5605 return -EBUSY;
5606 list_for_each_entry(rdev, &mddev->disks, same_set) {
5607 sector_t avail = rdev->sectors;
5608
5609 if (fit && (num_sectors == 0 || num_sectors > avail))
5610 num_sectors = avail;
5611 if (avail < num_sectors)
5612 return -ENOSPC;
5613 }
5614 rv = mddev->pers->resize(mddev, num_sectors);
5615 if (!rv)
5616 revalidate_disk(mddev->gendisk);
5617 return rv;
5618}
5619
5620static int update_raid_disks(mddev_t *mddev, int raid_disks)
5621{
5622 int rv;
5623
5624 if (mddev->pers->check_reshape == NULL)
5625 return -EINVAL;
5626 if (raid_disks <= 0 ||
5627 (mddev->max_disks && raid_disks >= mddev->max_disks))
5628 return -EINVAL;
5629 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
5630 return -EBUSY;
5631 mddev->delta_disks = raid_disks - mddev->raid_disks;
5632
5633 rv = mddev->pers->check_reshape(mddev);
5634 if (rv < 0)
5635 mddev->delta_disks = 0;
5636 return rv;
5637}
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
5649{
5650 int rv = 0;
5651 int cnt = 0;
5652 int state = 0;
5653
5654
5655 if (mddev->bitmap && mddev->bitmap_info.offset)
5656 state |= (1 << MD_SB_BITMAP_PRESENT);
5657
5658 if (mddev->major_version != info->major_version ||
5659 mddev->minor_version != info->minor_version ||
5660
5661 mddev->ctime != info->ctime ||
5662 mddev->level != info->level ||
5663
5664 !mddev->persistent != info->not_persistent||
5665 mddev->chunk_sectors != info->chunk_size >> 9 ||
5666
5667 ((state^info->state) & 0xfffffe00)
5668 )
5669 return -EINVAL;
5670
5671 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
5672 cnt++;
5673 if (mddev->raid_disks != info->raid_disks)
5674 cnt++;
5675 if (mddev->layout != info->layout)
5676 cnt++;
5677 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
5678 cnt++;
5679 if (cnt == 0)
5680 return 0;
5681 if (cnt > 1)
5682 return -EINVAL;
5683
5684 if (mddev->layout != info->layout) {
5685
5686
5687
5688
5689 if (mddev->pers->check_reshape == NULL)
5690 return -EINVAL;
5691 else {
5692 mddev->new_layout = info->layout;
5693 rv = mddev->pers->check_reshape(mddev);
5694 if (rv)
5695 mddev->new_layout = mddev->layout;
5696 return rv;
5697 }
5698 }
5699 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
5700 rv = update_size(mddev, (sector_t)info->size * 2);
5701
5702 if (mddev->raid_disks != info->raid_disks)
5703 rv = update_raid_disks(mddev, info->raid_disks);
5704
5705 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
5706 if (mddev->pers->quiesce == NULL)
5707 return -EINVAL;
5708 if (mddev->recovery || mddev->sync_thread)
5709 return -EBUSY;
5710 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
5711
5712 if (mddev->bitmap)
5713 return -EEXIST;
5714 if (mddev->bitmap_info.default_offset == 0)
5715 return -EINVAL;
5716 mddev->bitmap_info.offset =
5717 mddev->bitmap_info.default_offset;
5718 mddev->pers->quiesce(mddev, 1);
5719 rv = bitmap_create(mddev);
5720 if (!rv)
5721 rv = bitmap_load(mddev);
5722 if (rv)
5723 bitmap_destroy(mddev);
5724 mddev->pers->quiesce(mddev, 0);
5725 } else {
5726
5727 if (!mddev->bitmap)
5728 return -ENOENT;
5729 if (mddev->bitmap->file)
5730 return -EINVAL;
5731 mddev->pers->quiesce(mddev, 1);
5732 bitmap_destroy(mddev);
5733 mddev->pers->quiesce(mddev, 0);
5734 mddev->bitmap_info.offset = 0;
5735 }
5736 }
5737 md_update_sb(mddev, 1);
5738 return rv;
5739}
5740
5741static int set_disk_faulty(mddev_t *mddev, dev_t dev)
5742{
5743 mdk_rdev_t *rdev;
5744
5745 if (mddev->pers == NULL)
5746 return -ENODEV;
5747
5748 rdev = find_rdev(mddev, dev);
5749 if (!rdev)
5750 return -ENODEV;
5751
5752 md_error(mddev, rdev);
5753 return 0;
5754}
5755
5756
5757
5758
5759
5760
5761
5762static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
5763{
5764 mddev_t *mddev = bdev->bd_disk->private_data;
5765
5766 geo->heads = 2;
5767 geo->sectors = 4;
5768 geo->cylinders = mddev->array_sectors / 8;
5769 return 0;
5770}
5771
5772static int md_ioctl(struct block_device *bdev, fmode_t mode,
5773 unsigned int cmd, unsigned long arg)
5774{
5775 int err = 0;
5776 void __user *argp = (void __user *)arg;
5777 mddev_t *mddev = NULL;
5778 int ro;
5779
5780 if (!capable(CAP_SYS_ADMIN))
5781 return -EACCES;
5782
5783
5784
5785
5786
5787 switch (cmd)
5788 {
5789 case RAID_VERSION:
5790 err = get_version(argp);
5791 goto done;
5792
5793 case PRINT_RAID_DEBUG:
5794 err = 0;
5795 md_print_devices();
5796 goto done;
5797
5798#ifndef MODULE
5799 case RAID_AUTORUN:
5800 err = 0;
5801 autostart_arrays(arg);
5802 goto done;
5803#endif
5804 default:;
5805 }
5806
5807
5808
5809
5810
5811 mddev = bdev->bd_disk->private_data;
5812
5813 if (!mddev) {
5814 BUG();
5815 goto abort;
5816 }
5817
5818 err = mddev_lock(mddev);
5819 if (err) {
5820 printk(KERN_INFO
5821 "md: ioctl lock interrupted, reason %d, cmd %d\n",
5822 err, cmd);
5823 goto abort;
5824 }
5825
5826 switch (cmd)
5827 {
5828 case SET_ARRAY_INFO:
5829 {
5830 mdu_array_info_t info;
5831 if (!arg)
5832 memset(&info, 0, sizeof(info));
5833 else if (copy_from_user(&info, argp, sizeof(info))) {
5834 err = -EFAULT;
5835 goto abort_unlock;
5836 }
5837 if (mddev->pers) {
5838 err = update_array_info(mddev, &info);
5839 if (err) {
5840 printk(KERN_WARNING "md: couldn't update"
5841 " array info. %d\n", err);
5842 goto abort_unlock;
5843 }
5844 goto done_unlock;
5845 }
5846 if (!list_empty(&mddev->disks)) {
5847 printk(KERN_WARNING
5848 "md: array %s already has disks!\n",
5849 mdname(mddev));
5850 err = -EBUSY;
5851 goto abort_unlock;
5852 }
5853 if (mddev->raid_disks) {
5854 printk(KERN_WARNING
5855 "md: array %s already initialised!\n",
5856 mdname(mddev));
5857 err = -EBUSY;
5858 goto abort_unlock;
5859 }
5860 err = set_array_info(mddev, &info);
5861 if (err) {
5862 printk(KERN_WARNING "md: couldn't set"
5863 " array info. %d\n", err);
5864 goto abort_unlock;
5865 }
5866 }
5867 goto done_unlock;
5868
5869 default:;
5870 }
5871
5872
5873
5874
5875
5876
5877 if ((!mddev->raid_disks && !mddev->external)
5878 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
5879 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
5880 && cmd != GET_BITMAP_FILE) {
5881 err = -ENODEV;
5882 goto abort_unlock;
5883 }
5884
5885
5886
5887
5888 switch (cmd)
5889 {
5890 case GET_ARRAY_INFO:
5891 err = get_array_info(mddev, argp);
5892 goto done_unlock;
5893
5894 case GET_BITMAP_FILE:
5895 err = get_bitmap_file(mddev, argp);
5896 goto done_unlock;
5897
5898 case GET_DISK_INFO:
5899 err = get_disk_info(mddev, argp);
5900 goto done_unlock;
5901
5902 case RESTART_ARRAY_RW:
5903 err = restart_array(mddev);
5904 goto done_unlock;
5905
5906 case STOP_ARRAY:
5907 err = do_md_stop(mddev, 0, 1);
5908 goto done_unlock;
5909
5910 case STOP_ARRAY_RO:
5911 err = md_set_readonly(mddev, 1);
5912 goto done_unlock;
5913
5914 case BLKROSET:
5915 if (get_user(ro, (int __user *)(arg))) {
5916 err = -EFAULT;
5917 goto done_unlock;
5918 }
5919 err = -EINVAL;
5920
5921
5922
5923
5924 if (ro)
5925 goto done_unlock;
5926
5927
5928 if (mddev->ro != 1)
5929 goto done_unlock;
5930
5931
5932
5933
5934 if (mddev->pers) {
5935 err = restart_array(mddev);
5936 if (err == 0) {
5937 mddev->ro = 2;
5938 set_disk_ro(mddev->gendisk, 0);
5939 }
5940 }
5941 goto done_unlock;
5942 }
5943
5944
5945
5946
5947
5948
5949
5950
5951 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
5952 if (mddev->ro == 2) {
5953 mddev->ro = 0;
5954 sysfs_notify_dirent_safe(mddev->sysfs_state);
5955 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5956 md_wakeup_thread(mddev->thread);
5957 } else {
5958 err = -EROFS;
5959 goto abort_unlock;
5960 }
5961 }
5962
5963 switch (cmd)
5964 {
5965 case ADD_NEW_DISK:
5966 {
5967 mdu_disk_info_t info;
5968 if (copy_from_user(&info, argp, sizeof(info)))
5969 err = -EFAULT;
5970 else
5971 err = add_new_disk(mddev, &info);
5972 goto done_unlock;
5973 }
5974
5975 case HOT_REMOVE_DISK:
5976 err = hot_remove_disk(mddev, new_decode_dev(arg));
5977 goto done_unlock;
5978
5979 case HOT_ADD_DISK:
5980 err = hot_add_disk(mddev, new_decode_dev(arg));
5981 goto done_unlock;
5982
5983 case SET_DISK_FAULTY:
5984 err = set_disk_faulty(mddev, new_decode_dev(arg));
5985 goto done_unlock;
5986
5987 case RUN_ARRAY:
5988 err = do_md_run(mddev);
5989 goto done_unlock;
5990
5991 case SET_BITMAP_FILE:
5992 err = set_bitmap_file(mddev, (int)arg);
5993 goto done_unlock;
5994
5995 default:
5996 err = -EINVAL;
5997 goto abort_unlock;
5998 }
5999
6000done_unlock:
6001abort_unlock:
6002 if (mddev->hold_active == UNTIL_IOCTL &&
6003 err != -EINVAL)
6004 mddev->hold_active = 0;
6005 mddev_unlock(mddev);
6006
6007 return err;
6008done:
6009 if (err)
6010 MD_BUG();
6011abort:
6012 return err;
6013}
6014#ifdef CONFIG_COMPAT
6015static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
6016 unsigned int cmd, unsigned long arg)
6017{
6018 switch (cmd) {
6019 case HOT_REMOVE_DISK:
6020 case HOT_ADD_DISK:
6021 case SET_DISK_FAULTY:
6022 case SET_BITMAP_FILE:
6023
6024 break;
6025 default:
6026 arg = (unsigned long)compat_ptr(arg);
6027 break;
6028 }
6029
6030 return md_ioctl(bdev, mode, cmd, arg);
6031}
6032#endif
6033
6034static int md_open(struct block_device *bdev, fmode_t mode)
6035{
6036
6037
6038
6039
6040 mddev_t *mddev = mddev_find(bdev->bd_dev);
6041 int err;
6042
6043 if (mddev->gendisk != bdev->bd_disk) {
6044
6045
6046
6047 mddev_put(mddev);
6048
6049 flush_workqueue(md_misc_wq);
6050
6051 return -ERESTARTSYS;
6052 }
6053 BUG_ON(mddev != bdev->bd_disk->private_data);
6054
6055 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
6056 goto out;
6057
6058 err = 0;
6059 atomic_inc(&mddev->openers);
6060 mutex_unlock(&mddev->open_mutex);
6061
6062 check_disk_change(bdev);
6063 out:
6064 return err;
6065}
6066
6067static int md_release(struct gendisk *disk, fmode_t mode)
6068{
6069 mddev_t *mddev = disk->private_data;
6070
6071 BUG_ON(!mddev);
6072 atomic_dec(&mddev->openers);
6073 mddev_put(mddev);
6074
6075 return 0;
6076}
6077
6078static int md_media_changed(struct gendisk *disk)
6079{
6080 mddev_t *mddev = disk->private_data;
6081
6082 return mddev->changed;
6083}
6084
6085static int md_revalidate(struct gendisk *disk)
6086{
6087 mddev_t *mddev = disk->private_data;
6088
6089 mddev->changed = 0;
6090 return 0;
6091}
6092static const struct block_device_operations md_fops =
6093{
6094 .owner = THIS_MODULE,
6095 .open = md_open,
6096 .release = md_release,
6097 .ioctl = md_ioctl,
6098#ifdef CONFIG_COMPAT
6099 .compat_ioctl = md_compat_ioctl,
6100#endif
6101 .getgeo = md_getgeo,
6102 .media_changed = md_media_changed,
6103 .revalidate_disk= md_revalidate,
6104};
6105
6106static int md_thread(void * arg)
6107{
6108 mdk_thread_t *thread = arg;
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122 allow_signal(SIGKILL);
6123 while (!kthread_should_stop()) {
6124
6125
6126
6127
6128
6129
6130 if (signal_pending(current))
6131 flush_signals(current);
6132
6133 wait_event_interruptible_timeout
6134 (thread->wqueue,
6135 test_bit(THREAD_WAKEUP, &thread->flags)
6136 || kthread_should_stop(),
6137 thread->timeout);
6138
6139 clear_bit(THREAD_WAKEUP, &thread->flags);
6140 if (!kthread_should_stop())
6141 thread->run(thread->mddev);
6142 }
6143
6144 return 0;
6145}
6146
6147void md_wakeup_thread(mdk_thread_t *thread)
6148{
6149 if (thread) {
6150 dprintk("md: waking up MD thread %s.\n", thread->tsk->comm);
6151 set_bit(THREAD_WAKEUP, &thread->flags);
6152 wake_up(&thread->wqueue);
6153 }
6154}
6155
6156mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
6157 const char *name)
6158{
6159 mdk_thread_t *thread;
6160
6161 thread = kzalloc(sizeof(mdk_thread_t), GFP_KERNEL);
6162 if (!thread)
6163 return NULL;
6164
6165 init_waitqueue_head(&thread->wqueue);
6166
6167 thread->run = run;
6168 thread->mddev = mddev;
6169 thread->timeout = MAX_SCHEDULE_TIMEOUT;
6170 thread->tsk = kthread_run(md_thread, thread,
6171 "%s_%s",
6172 mdname(thread->mddev),
6173 name ?: mddev->pers->name);
6174 if (IS_ERR(thread->tsk)) {
6175 kfree(thread);
6176 return NULL;
6177 }
6178 return thread;
6179}
6180
6181void md_unregister_thread(mdk_thread_t *thread)
6182{
6183 if (!thread)
6184 return;
6185 dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
6186
6187 kthread_stop(thread->tsk);
6188 kfree(thread);
6189}
6190
6191void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
6192{
6193 if (!mddev) {
6194 MD_BUG();
6195 return;
6196 }
6197
6198 if (!rdev || test_bit(Faulty, &rdev->flags))
6199 return;
6200
6201 if (mddev->external)
6202 set_bit(Blocked, &rdev->flags);
6203
6204
6205
6206
6207
6208
6209
6210 if (!mddev->pers)
6211 return;
6212 if (!mddev->pers->error_handler)
6213 return;
6214 mddev->pers->error_handler(mddev,rdev);
6215 if (mddev->degraded)
6216 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6217 sysfs_notify_dirent_safe(rdev->sysfs_state);
6218 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6219 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6220 md_wakeup_thread(mddev->thread);
6221 if (mddev->event_work.func)
6222 queue_work(md_misc_wq, &mddev->event_work);
6223 md_new_event_inintr(mddev);
6224}
6225
6226
6227
6228static void status_unused(struct seq_file *seq)
6229{
6230 int i = 0;
6231 mdk_rdev_t *rdev;
6232
6233 seq_printf(seq, "unused devices: ");
6234
6235 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
6236 char b[BDEVNAME_SIZE];
6237 i++;
6238 seq_printf(seq, "%s ",
6239 bdevname(rdev->bdev,b));
6240 }
6241 if (!i)
6242 seq_printf(seq, "<none>");
6243
6244 seq_printf(seq, "\n");
6245}
6246
6247
6248static void status_resync(struct seq_file *seq, mddev_t * mddev)
6249{
6250 sector_t max_sectors, resync, res;
6251 unsigned long dt, db;
6252 sector_t rt;
6253 int scale;
6254 unsigned int per_milli;
6255
6256 resync = mddev->curr_resync - atomic_read(&mddev->recovery_active);
6257
6258 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
6259 max_sectors = mddev->resync_max_sectors;
6260 else
6261 max_sectors = mddev->dev_sectors;
6262
6263
6264
6265
6266 if (!max_sectors) {
6267 MD_BUG();
6268 return;
6269 }
6270
6271
6272
6273
6274
6275 scale = 10;
6276 if (sizeof(sector_t) > sizeof(unsigned long)) {
6277 while ( max_sectors/2 > (1ULL<<(scale+32)))
6278 scale++;
6279 }
6280 res = (resync>>scale)*1000;
6281 sector_div(res, (u32)((max_sectors>>scale)+1));
6282
6283 per_milli = res;
6284 {
6285 int i, x = per_milli/50, y = 20-x;
6286 seq_printf(seq, "[");
6287 for (i = 0; i < x; i++)
6288 seq_printf(seq, "=");
6289 seq_printf(seq, ">");
6290 for (i = 0; i < y; i++)
6291 seq_printf(seq, ".");
6292 seq_printf(seq, "] ");
6293 }
6294 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
6295 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
6296 "reshape" :
6297 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
6298 "check" :
6299 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
6300 "resync" : "recovery"))),
6301 per_milli/10, per_milli % 10,
6302 (unsigned long long) resync/2,
6303 (unsigned long long) max_sectors/2);
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319 dt = ((jiffies - mddev->resync_mark) / HZ);
6320 if (!dt) dt++;
6321 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
6322 - mddev->resync_mark_cnt;
6323
6324 rt = max_sectors - resync;
6325 sector_div(rt, db/32+1);
6326 rt *= dt;
6327 rt >>= 5;
6328
6329 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
6330 ((unsigned long)rt % 60)/6);
6331
6332 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
6333}
6334
6335static void *md_seq_start(struct seq_file *seq, loff_t *pos)
6336{
6337 struct list_head *tmp;
6338 loff_t l = *pos;
6339 mddev_t *mddev;
6340
6341 if (l >= 0x10000)
6342 return NULL;
6343 if (!l--)
6344
6345 return (void*)1;
6346
6347 spin_lock(&all_mddevs_lock);
6348 list_for_each(tmp,&all_mddevs)
6349 if (!l--) {
6350 mddev = list_entry(tmp, mddev_t, all_mddevs);
6351 mddev_get(mddev);
6352 spin_unlock(&all_mddevs_lock);
6353 return mddev;
6354 }
6355 spin_unlock(&all_mddevs_lock);
6356 if (!l--)
6357 return (void*)2;
6358 return NULL;
6359}
6360
6361static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
6362{
6363 struct list_head *tmp;
6364 mddev_t *next_mddev, *mddev = v;
6365
6366 ++*pos;
6367 if (v == (void*)2)
6368 return NULL;
6369
6370 spin_lock(&all_mddevs_lock);
6371 if (v == (void*)1)
6372 tmp = all_mddevs.next;
6373 else
6374 tmp = mddev->all_mddevs.next;
6375 if (tmp != &all_mddevs)
6376 next_mddev = mddev_get(list_entry(tmp,mddev_t,all_mddevs));
6377 else {
6378 next_mddev = (void*)2;
6379 *pos = 0x10000;
6380 }
6381 spin_unlock(&all_mddevs_lock);
6382
6383 if (v != (void*)1)
6384 mddev_put(mddev);
6385 return next_mddev;
6386
6387}
6388
6389static void md_seq_stop(struct seq_file *seq, void *v)
6390{
6391 mddev_t *mddev = v;
6392
6393 if (mddev && v != (void*)1 && v != (void*)2)
6394 mddev_put(mddev);
6395}
6396
6397struct mdstat_info {
6398 int event;
6399};
6400
6401static int md_seq_show(struct seq_file *seq, void *v)
6402{
6403 mddev_t *mddev = v;
6404 sector_t sectors;
6405 mdk_rdev_t *rdev;
6406 struct mdstat_info *mi = seq->private;
6407 struct bitmap *bitmap;
6408
6409 if (v == (void*)1) {
6410 struct mdk_personality *pers;
6411 seq_printf(seq, "Personalities : ");
6412 spin_lock(&pers_lock);
6413 list_for_each_entry(pers, &pers_list, list)
6414 seq_printf(seq, "[%s] ", pers->name);
6415
6416 spin_unlock(&pers_lock);
6417 seq_printf(seq, "\n");
6418 mi->event = atomic_read(&md_event_count);
6419 return 0;
6420 }
6421 if (v == (void*)2) {
6422 status_unused(seq);
6423 return 0;
6424 }
6425
6426 if (mddev_lock(mddev) < 0)
6427 return -EINTR;
6428
6429 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
6430 seq_printf(seq, "%s : %sactive", mdname(mddev),
6431 mddev->pers ? "" : "in");
6432 if (mddev->pers) {
6433 if (mddev->ro==1)
6434 seq_printf(seq, " (read-only)");
6435 if (mddev->ro==2)
6436 seq_printf(seq, " (auto-read-only)");
6437 seq_printf(seq, " %s", mddev->pers->name);
6438 }
6439
6440 sectors = 0;
6441 list_for_each_entry(rdev, &mddev->disks, same_set) {
6442 char b[BDEVNAME_SIZE];
6443 seq_printf(seq, " %s[%d]",
6444 bdevname(rdev->bdev,b), rdev->desc_nr);
6445 if (test_bit(WriteMostly, &rdev->flags))
6446 seq_printf(seq, "(W)");
6447 if (test_bit(Faulty, &rdev->flags)) {
6448 seq_printf(seq, "(F)");
6449 continue;
6450 } else if (rdev->raid_disk < 0)
6451 seq_printf(seq, "(S)");
6452 sectors += rdev->sectors;
6453 }
6454
6455 if (!list_empty(&mddev->disks)) {
6456 if (mddev->pers)
6457 seq_printf(seq, "\n %llu blocks",
6458 (unsigned long long)
6459 mddev->array_sectors / 2);
6460 else
6461 seq_printf(seq, "\n %llu blocks",
6462 (unsigned long long)sectors / 2);
6463 }
6464 if (mddev->persistent) {
6465 if (mddev->major_version != 0 ||
6466 mddev->minor_version != 90) {
6467 seq_printf(seq," super %d.%d",
6468 mddev->major_version,
6469 mddev->minor_version);
6470 }
6471 } else if (mddev->external)
6472 seq_printf(seq, " super external:%s",
6473 mddev->metadata_type);
6474 else
6475 seq_printf(seq, " super non-persistent");
6476
6477 if (mddev->pers) {
6478 mddev->pers->status(seq, mddev);
6479 seq_printf(seq, "\n ");
6480 if (mddev->pers->sync_request) {
6481 if (mddev->curr_resync > 2) {
6482 status_resync(seq, mddev);
6483 seq_printf(seq, "\n ");
6484 } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2)
6485 seq_printf(seq, "\tresync=DELAYED\n ");
6486 else if (mddev->recovery_cp < MaxSector)
6487 seq_printf(seq, "\tresync=PENDING\n ");
6488 }
6489 } else
6490 seq_printf(seq, "\n ");
6491
6492 if ((bitmap = mddev->bitmap)) {
6493 unsigned long chunk_kb;
6494 unsigned long flags;
6495 spin_lock_irqsave(&bitmap->lock, flags);
6496 chunk_kb = mddev->bitmap_info.chunksize >> 10;
6497 seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
6498 "%lu%s chunk",
6499 bitmap->pages - bitmap->missing_pages,
6500 bitmap->pages,
6501 (bitmap->pages - bitmap->missing_pages)
6502 << (PAGE_SHIFT - 10),
6503 chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize,
6504 chunk_kb ? "KB" : "B");
6505 if (bitmap->file) {
6506 seq_printf(seq, ", file: ");
6507 seq_path(seq, &bitmap->file->f_path, " \t\n");
6508 }
6509
6510 seq_printf(seq, "\n");
6511 spin_unlock_irqrestore(&bitmap->lock, flags);
6512 }
6513
6514 seq_printf(seq, "\n");
6515 }
6516 mddev_unlock(mddev);
6517
6518 return 0;
6519}
6520
6521static const struct seq_operations md_seq_ops = {
6522 .start = md_seq_start,
6523 .next = md_seq_next,
6524 .stop = md_seq_stop,
6525 .show = md_seq_show,
6526};
6527
6528static int md_seq_open(struct inode *inode, struct file *file)
6529{
6530 int error;
6531 struct mdstat_info *mi = kmalloc(sizeof(*mi), GFP_KERNEL);
6532 if (mi == NULL)
6533 return -ENOMEM;
6534
6535 error = seq_open(file, &md_seq_ops);
6536 if (error)
6537 kfree(mi);
6538 else {
6539 struct seq_file *p = file->private_data;
6540 p->private = mi;
6541 mi->event = atomic_read(&md_event_count);
6542 }
6543 return error;
6544}
6545
6546static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
6547{
6548 struct seq_file *m = filp->private_data;
6549 struct mdstat_info *mi = m->private;
6550 int mask;
6551
6552 poll_wait(filp, &md_event_waiters, wait);
6553
6554
6555 mask = POLLIN | POLLRDNORM;
6556
6557 if (mi->event != atomic_read(&md_event_count))
6558 mask |= POLLERR | POLLPRI;
6559 return mask;
6560}
6561
6562static const struct file_operations md_seq_fops = {
6563 .owner = THIS_MODULE,
6564 .open = md_seq_open,
6565 .read = seq_read,
6566 .llseek = seq_lseek,
6567 .release = seq_release_private,
6568 .poll = mdstat_poll,
6569};
6570
6571int register_md_personality(struct mdk_personality *p)
6572{
6573 spin_lock(&pers_lock);
6574 list_add_tail(&p->list, &pers_list);
6575 printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level);
6576 spin_unlock(&pers_lock);
6577 return 0;
6578}
6579
6580int unregister_md_personality(struct mdk_personality *p)
6581{
6582 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
6583 spin_lock(&pers_lock);
6584 list_del_init(&p->list);
6585 spin_unlock(&pers_lock);
6586 return 0;
6587}
6588
6589static int is_mddev_idle(mddev_t *mddev, int init)
6590{
6591 mdk_rdev_t * rdev;
6592 int idle;
6593 int curr_events;
6594
6595 idle = 1;
6596 rcu_read_lock();
6597 rdev_for_each_rcu(rdev, mddev) {
6598 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
6599 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
6600 (int)part_stat_read(&disk->part0, sectors[1]) -
6601 atomic_read(&disk->sync_io);
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624 if (init || curr_events - rdev->last_events > 64) {
6625 rdev->last_events = curr_events;
6626 idle = 0;
6627 }
6628 }
6629 rcu_read_unlock();
6630 return idle;
6631}
6632
6633void md_done_sync(mddev_t *mddev, int blocks, int ok)
6634{
6635
6636 atomic_sub(blocks, &mddev->recovery_active);
6637 wake_up(&mddev->recovery_wait);
6638 if (!ok) {
6639 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6640 md_wakeup_thread(mddev->thread);
6641
6642 }
6643}
6644
6645
6646
6647
6648
6649
6650
6651void md_write_start(mddev_t *mddev, struct bio *bi)
6652{
6653 int did_change = 0;
6654 if (bio_data_dir(bi) != WRITE)
6655 return;
6656
6657 BUG_ON(mddev->ro == 1);
6658 if (mddev->ro == 2) {
6659
6660 mddev->ro = 0;
6661 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6662 md_wakeup_thread(mddev->thread);
6663 md_wakeup_thread(mddev->sync_thread);
6664 did_change = 1;
6665 }
6666 atomic_inc(&mddev->writes_pending);
6667 if (mddev->safemode == 1)
6668 mddev->safemode = 0;
6669 if (mddev->in_sync) {
6670 spin_lock_irq(&mddev->write_lock);
6671 if (mddev->in_sync) {
6672 mddev->in_sync = 0;
6673 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6674 set_bit(MD_CHANGE_PENDING, &mddev->flags);
6675 md_wakeup_thread(mddev->thread);
6676 did_change = 1;
6677 }
6678 spin_unlock_irq(&mddev->write_lock);
6679 }
6680 if (did_change)
6681 sysfs_notify_dirent_safe(mddev->sysfs_state);
6682 wait_event(mddev->sb_wait,
6683 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
6684}
6685
6686void md_write_end(mddev_t *mddev)
6687{
6688 if (atomic_dec_and_test(&mddev->writes_pending)) {
6689 if (mddev->safemode == 2)
6690 md_wakeup_thread(mddev->thread);
6691 else if (mddev->safemode_delay)
6692 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
6693 }
6694}
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705int md_allow_write(mddev_t *mddev)
6706{
6707 if (!mddev->pers)
6708 return 0;
6709 if (mddev->ro)
6710 return 0;
6711 if (!mddev->pers->sync_request)
6712 return 0;
6713
6714 spin_lock_irq(&mddev->write_lock);
6715 if (mddev->in_sync) {
6716 mddev->in_sync = 0;
6717 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6718 set_bit(MD_CHANGE_PENDING, &mddev->flags);
6719 if (mddev->safemode_delay &&
6720 mddev->safemode == 0)
6721 mddev->safemode = 1;
6722 spin_unlock_irq(&mddev->write_lock);
6723 md_update_sb(mddev, 0);
6724 sysfs_notify_dirent_safe(mddev->sysfs_state);
6725 } else
6726 spin_unlock_irq(&mddev->write_lock);
6727
6728 if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
6729 return -EAGAIN;
6730 else
6731 return 0;
6732}
6733EXPORT_SYMBOL_GPL(md_allow_write);
6734
6735#define SYNC_MARKS 10
6736#define SYNC_MARK_STEP (3*HZ)
6737void md_do_sync(mddev_t *mddev)
6738{
6739 mddev_t *mddev2;
6740 unsigned int currspeed = 0,
6741 window;
6742 sector_t max_sectors,j, io_sectors;
6743 unsigned long mark[SYNC_MARKS];
6744 sector_t mark_cnt[SYNC_MARKS];
6745 int last_mark,m;
6746 struct list_head *tmp;
6747 sector_t last_check;
6748 int skipped = 0;
6749 mdk_rdev_t *rdev;
6750 char *desc;
6751
6752
6753 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
6754 return;
6755 if (mddev->ro)
6756 return;
6757
6758 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
6759 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
6760 desc = "data-check";
6761 else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
6762 desc = "requested-resync";
6763 else
6764 desc = "resync";
6765 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
6766 desc = "reshape";
6767 else
6768 desc = "recovery";
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786 do {
6787 mddev->curr_resync = 2;
6788
6789 try_again:
6790 if (kthread_should_stop())
6791 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6792
6793 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
6794 goto skip;
6795 for_each_mddev(mddev2, tmp) {
6796 if (mddev2 == mddev)
6797 continue;
6798 if (!mddev->parallel_resync
6799 && mddev2->curr_resync
6800 && match_mddev_units(mddev, mddev2)) {
6801 DEFINE_WAIT(wq);
6802 if (mddev < mddev2 && mddev->curr_resync == 2) {
6803
6804 mddev->curr_resync = 1;
6805 wake_up(&resync_wait);
6806 }
6807 if (mddev > mddev2 && mddev->curr_resync == 1)
6808
6809
6810
6811 continue;
6812
6813
6814
6815
6816 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
6817 if (!kthread_should_stop() &&
6818 mddev2->curr_resync >= mddev->curr_resync) {
6819 printk(KERN_INFO "md: delaying %s of %s"
6820 " until %s has finished (they"
6821 " share one or more physical units)\n",
6822 desc, mdname(mddev), mdname(mddev2));
6823 mddev_put(mddev2);
6824 if (signal_pending(current))
6825 flush_signals(current);
6826 schedule();
6827 finish_wait(&resync_wait, &wq);
6828 goto try_again;
6829 }
6830 finish_wait(&resync_wait, &wq);
6831 }
6832 }
6833 } while (mddev->curr_resync < 2);
6834
6835 j = 0;
6836 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
6837
6838
6839
6840 max_sectors = mddev->resync_max_sectors;
6841 mddev->resync_mismatches = 0;
6842
6843 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
6844 j = mddev->resync_min;
6845 else if (!mddev->bitmap)
6846 j = mddev->recovery_cp;
6847
6848 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
6849 max_sectors = mddev->dev_sectors;
6850 else {
6851
6852 max_sectors = mddev->dev_sectors;
6853 j = MaxSector;
6854 rcu_read_lock();
6855 list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
6856 if (rdev->raid_disk >= 0 &&
6857 !test_bit(Faulty, &rdev->flags) &&
6858 !test_bit(In_sync, &rdev->flags) &&
6859 rdev->recovery_offset < j)
6860 j = rdev->recovery_offset;
6861 rcu_read_unlock();
6862 }
6863
6864 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
6865 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
6866 " %d KB/sec/disk.\n", speed_min(mddev));
6867 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
6868 "(but not more than %d KB/sec) for %s.\n",
6869 speed_max(mddev), desc);
6870
6871 is_mddev_idle(mddev, 1);
6872
6873 io_sectors = 0;
6874 for (m = 0; m < SYNC_MARKS; m++) {
6875 mark[m] = jiffies;
6876 mark_cnt[m] = io_sectors;
6877 }
6878 last_mark = 0;
6879 mddev->resync_mark = mark[last_mark];
6880 mddev->resync_mark_cnt = mark_cnt[last_mark];
6881
6882
6883
6884
6885 window = 32*(PAGE_SIZE/512);
6886 printk(KERN_INFO "md: using %dk window, over a total of %lluk.\n",
6887 window/2, (unsigned long long)max_sectors/2);
6888
6889 atomic_set(&mddev->recovery_active, 0);
6890 last_check = 0;
6891
6892 if (j>2) {
6893 printk(KERN_INFO
6894 "md: resuming %s of %s from checkpoint.\n",
6895 desc, mdname(mddev));
6896 mddev->curr_resync = j;
6897 }
6898 mddev->curr_resync_completed = j;
6899
6900 while (j < max_sectors) {
6901 sector_t sectors;
6902
6903 skipped = 0;
6904
6905 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
6906 ((mddev->curr_resync > mddev->curr_resync_completed &&
6907 (mddev->curr_resync - mddev->curr_resync_completed)
6908 > (max_sectors >> 4)) ||
6909 (j - mddev->curr_resync_completed)*2
6910 >= mddev->resync_max - mddev->curr_resync_completed
6911 )) {
6912
6913 wait_event(mddev->recovery_wait,
6914 atomic_read(&mddev->recovery_active) == 0);
6915 mddev->curr_resync_completed = j;
6916 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6917 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
6918 }
6919
6920 while (j >= mddev->resync_max && !kthread_should_stop()) {
6921
6922
6923
6924
6925 flush_signals(current);
6926 wait_event_interruptible(mddev->recovery_wait,
6927 mddev->resync_max > j
6928 || kthread_should_stop());
6929 }
6930
6931 if (kthread_should_stop())
6932 goto interrupted;
6933
6934 sectors = mddev->pers->sync_request(mddev, j, &skipped,
6935 currspeed < speed_min(mddev));
6936 if (sectors == 0) {
6937 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6938 goto out;
6939 }
6940
6941 if (!skipped) {
6942 io_sectors += sectors;
6943 atomic_add(sectors, &mddev->recovery_active);
6944 }
6945
6946 j += sectors;
6947 if (j>1) mddev->curr_resync = j;
6948 mddev->curr_mark_cnt = io_sectors;
6949 if (last_check == 0)
6950
6951
6952
6953 md_new_event(mddev);
6954
6955 if (last_check + window > io_sectors || j == max_sectors)
6956 continue;
6957
6958 last_check = io_sectors;
6959
6960 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
6961 break;
6962
6963 repeat:
6964 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
6965
6966 int next = (last_mark+1) % SYNC_MARKS;
6967
6968 mddev->resync_mark = mark[next];
6969 mddev->resync_mark_cnt = mark_cnt[next];
6970 mark[next] = jiffies;
6971 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
6972 last_mark = next;
6973 }
6974
6975
6976 if (kthread_should_stop())
6977 goto interrupted;
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987
6988 cond_resched();
6989
6990 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
6991 /((jiffies-mddev->resync_mark)/HZ +1) +1;
6992
6993 if (currspeed > speed_min(mddev)) {
6994 if ((currspeed > speed_max(mddev)) ||
6995 !is_mddev_idle(mddev, 0)) {
6996 msleep(500);
6997 goto repeat;
6998 }
6999 }
7000 }
7001 printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
7002
7003
7004
7005 out:
7006 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
7007
7008
7009 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
7010
7011 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
7012 mddev->curr_resync > 2) {
7013 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7014 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7015 if (mddev->curr_resync >= mddev->recovery_cp) {
7016 printk(KERN_INFO
7017 "md: checkpointing %s of %s.\n",
7018 desc, mdname(mddev));
7019 mddev->recovery_cp = mddev->curr_resync;
7020 }
7021 } else
7022 mddev->recovery_cp = MaxSector;
7023 } else {
7024 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7025 mddev->curr_resync = MaxSector;
7026 rcu_read_lock();
7027 list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
7028 if (rdev->raid_disk >= 0 &&
7029 mddev->delta_disks >= 0 &&
7030 !test_bit(Faulty, &rdev->flags) &&
7031 !test_bit(In_sync, &rdev->flags) &&
7032 rdev->recovery_offset < mddev->curr_resync)
7033 rdev->recovery_offset = mddev->curr_resync;
7034 rcu_read_unlock();
7035 }
7036 }
7037 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7038
7039 skip:
7040 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7041
7042 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7043 mddev->resync_min = 0;
7044 mddev->resync_max = MaxSector;
7045 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7046 mddev->resync_min = mddev->curr_resync_completed;
7047 mddev->curr_resync = 0;
7048 wake_up(&resync_wait);
7049 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
7050 md_wakeup_thread(mddev->thread);
7051 return;
7052
7053 interrupted:
7054
7055
7056
7057 printk(KERN_INFO
7058 "md: md_do_sync() got signal ... exiting\n");
7059 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7060 goto out;
7061
7062}
7063EXPORT_SYMBOL_GPL(md_do_sync);
7064
7065static int remove_and_add_spares(mddev_t *mddev)
7066{
7067 mdk_rdev_t *rdev;
7068 int spares = 0;
7069
7070 mddev->curr_resync_completed = 0;
7071
7072 list_for_each_entry(rdev, &mddev->disks, same_set)
7073 if (rdev->raid_disk >= 0 &&
7074 !test_bit(Blocked, &rdev->flags) &&
7075 (test_bit(Faulty, &rdev->flags) ||
7076 ! test_bit(In_sync, &rdev->flags)) &&
7077 atomic_read(&rdev->nr_pending)==0) {
7078 if (mddev->pers->hot_remove_disk(
7079 mddev, rdev->raid_disk)==0) {
7080 char nm[20];
7081 sprintf(nm,"rd%d", rdev->raid_disk);
7082 sysfs_remove_link(&mddev->kobj, nm);
7083 rdev->raid_disk = -1;
7084 }
7085 }
7086
7087 if (mddev->degraded && !mddev->recovery_disabled) {
7088 list_for_each_entry(rdev, &mddev->disks, same_set) {
7089 if (rdev->raid_disk >= 0 &&
7090 !test_bit(In_sync, &rdev->flags) &&
7091 !test_bit(Faulty, &rdev->flags) &&
7092 !test_bit(Blocked, &rdev->flags))
7093 spares++;
7094 if (rdev->raid_disk < 0
7095 && !test_bit(Faulty, &rdev->flags)) {
7096 rdev->recovery_offset = 0;
7097 if (mddev->pers->
7098 hot_add_disk(mddev, rdev) == 0) {
7099 char nm[20];
7100 sprintf(nm, "rd%d", rdev->raid_disk);
7101 if (sysfs_create_link(&mddev->kobj,
7102 &rdev->kobj, nm))
7103 ;
7104 spares++;
7105 md_new_event(mddev);
7106 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7107 } else
7108 break;
7109 }
7110 }
7111 }
7112 return spares;
7113}
7114
7115static void reap_sync_thread(mddev_t *mddev)
7116{
7117 mdk_rdev_t *rdev;
7118
7119
7120 md_unregister_thread(mddev->sync_thread);
7121 mddev->sync_thread = NULL;
7122 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7123 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7124
7125
7126 if (mddev->pers->spare_active(mddev))
7127 sysfs_notify(&mddev->kobj, NULL,
7128 "degraded");
7129 }
7130 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7131 mddev->pers->finish_reshape)
7132 mddev->pers->finish_reshape(mddev);
7133 md_update_sb(mddev, 1);
7134
7135
7136
7137
7138 if (!mddev->degraded)
7139 list_for_each_entry(rdev, &mddev->disks, same_set)
7140 rdev->saved_raid_disk = -1;
7141
7142 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7143 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7144 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7145 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7146 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7147
7148 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7149 sysfs_notify_dirent_safe(mddev->sysfs_action);
7150 md_new_event(mddev);
7151}
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7175void md_check_recovery(mddev_t *mddev)
7176{
7177 if (mddev->suspended)
7178 return;
7179
7180 if (mddev->bitmap)
7181 bitmap_daemon_work(mddev);
7182
7183 if (mddev->ro)
7184 return;
7185
7186 if (signal_pending(current)) {
7187 if (mddev->pers->sync_request && !mddev->external) {
7188 printk(KERN_INFO "md: %s in immediate safe mode\n",
7189 mdname(mddev));
7190 mddev->safemode = 2;
7191 }
7192 flush_signals(current);
7193 }
7194
7195 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
7196 return;
7197 if ( ! (
7198 (mddev->flags & ~ (1<<MD_CHANGE_PENDING)) ||
7199 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7200 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
7201 (mddev->external == 0 && mddev->safemode == 1) ||
7202 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
7203 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
7204 ))
7205 return;
7206
7207 if (mddev_trylock(mddev)) {
7208 int spares = 0;
7209
7210 if (mddev->ro) {
7211
7212
7213
7214 mdk_rdev_t *rdev;
7215 list_for_each_entry(rdev, &mddev->disks, same_set)
7216 if (rdev->raid_disk >= 0 &&
7217 !test_bit(Blocked, &rdev->flags) &&
7218 test_bit(Faulty, &rdev->flags) &&
7219 atomic_read(&rdev->nr_pending)==0) {
7220 if (mddev->pers->hot_remove_disk(
7221 mddev, rdev->raid_disk)==0) {
7222 char nm[20];
7223 sprintf(nm,"rd%d", rdev->raid_disk);
7224 sysfs_remove_link(&mddev->kobj, nm);
7225 rdev->raid_disk = -1;
7226 }
7227 }
7228 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7229 goto unlock;
7230 }
7231
7232 if (!mddev->external) {
7233 int did_change = 0;
7234 spin_lock_irq(&mddev->write_lock);
7235 if (mddev->safemode &&
7236 !atomic_read(&mddev->writes_pending) &&
7237 !mddev->in_sync &&
7238 mddev->recovery_cp == MaxSector) {
7239 mddev->in_sync = 1;
7240 did_change = 1;
7241 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7242 }
7243 if (mddev->safemode == 1)
7244 mddev->safemode = 0;
7245 spin_unlock_irq(&mddev->write_lock);
7246 if (did_change)
7247 sysfs_notify_dirent_safe(mddev->sysfs_state);
7248 }
7249
7250 if (mddev->flags)
7251 md_update_sb(mddev, 0);
7252
7253 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
7254 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
7255
7256 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7257 goto unlock;
7258 }
7259 if (mddev->sync_thread) {
7260 reap_sync_thread(mddev);
7261 goto unlock;
7262 }
7263
7264
7265
7266 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7267 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7268
7269
7270
7271 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
7272 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
7273
7274 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
7275 goto unlock;
7276
7277
7278
7279
7280
7281
7282
7283 if (mddev->reshape_position != MaxSector) {
7284 if (mddev->pers->check_reshape == NULL ||
7285 mddev->pers->check_reshape(mddev) != 0)
7286
7287 goto unlock;
7288 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7289 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7290 } else if ((spares = remove_and_add_spares(mddev))) {
7291 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7292 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7293 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7294 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7295 } else if (mddev->recovery_cp < MaxSector) {
7296 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7297 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7298 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
7299
7300 goto unlock;
7301
7302 if (mddev->pers->sync_request) {
7303 if (spares && mddev->bitmap && ! mddev->bitmap->file) {
7304
7305
7306
7307
7308 bitmap_write_all(mddev->bitmap);
7309 }
7310 mddev->sync_thread = md_register_thread(md_do_sync,
7311 mddev,
7312 "resync");
7313 if (!mddev->sync_thread) {
7314 printk(KERN_ERR "%s: could not start resync"
7315 " thread...\n",
7316 mdname(mddev));
7317
7318 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7319 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7320 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7321 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7322 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7323 } else
7324 md_wakeup_thread(mddev->sync_thread);
7325 sysfs_notify_dirent_safe(mddev->sysfs_action);
7326 md_new_event(mddev);
7327 }
7328 unlock:
7329 if (!mddev->sync_thread) {
7330 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7331 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
7332 &mddev->recovery))
7333 if (mddev->sysfs_action)
7334 sysfs_notify_dirent_safe(mddev->sysfs_action);
7335 }
7336 mddev_unlock(mddev);
7337 }
7338}
7339
7340void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
7341{
7342 sysfs_notify_dirent_safe(rdev->sysfs_state);
7343 wait_event_timeout(rdev->blocked_wait,
7344 !test_bit(Blocked, &rdev->flags),
7345 msecs_to_jiffies(5000));
7346 rdev_dec_pending(rdev, mddev);
7347}
7348EXPORT_SYMBOL(md_wait_for_blocked_rdev);
7349
7350static int md_notify_reboot(struct notifier_block *this,
7351 unsigned long code, void *x)
7352{
7353 struct list_head *tmp;
7354 mddev_t *mddev;
7355
7356 if ((code == SYS_DOWN) || (code == SYS_HALT) || (code == SYS_POWER_OFF)) {
7357
7358 printk(KERN_INFO "md: stopping all md devices.\n");
7359
7360 for_each_mddev(mddev, tmp)
7361 if (mddev_trylock(mddev)) {
7362
7363
7364
7365
7366 md_set_readonly(mddev, 100);
7367 mddev_unlock(mddev);
7368 }
7369
7370
7371
7372
7373
7374
7375 mdelay(1000*1);
7376 }
7377 return NOTIFY_DONE;
7378}
7379
7380static struct notifier_block md_notifier = {
7381 .notifier_call = md_notify_reboot,
7382 .next = NULL,
7383 .priority = INT_MAX,
7384};
7385
7386static void md_geninit(void)
7387{
7388 dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
7389
7390 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
7391}
7392
7393static int __init md_init(void)
7394{
7395 int ret = -ENOMEM;
7396
7397 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
7398 if (!md_wq)
7399 goto err_wq;
7400
7401 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
7402 if (!md_misc_wq)
7403 goto err_misc_wq;
7404
7405 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
7406 goto err_md;
7407
7408 if ((ret = register_blkdev(0, "mdp")) < 0)
7409 goto err_mdp;
7410 mdp_major = ret;
7411
7412 blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
7413 md_probe, NULL, NULL);
7414 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
7415 md_probe, NULL, NULL);
7416
7417 register_reboot_notifier(&md_notifier);
7418 raid_table_header = register_sysctl_table(raid_root_table);
7419
7420 md_geninit();
7421 return 0;
7422
7423err_mdp:
7424 unregister_blkdev(MD_MAJOR, "md");
7425err_md:
7426 destroy_workqueue(md_misc_wq);
7427err_misc_wq:
7428 destroy_workqueue(md_wq);
7429err_wq:
7430 return ret;
7431}
7432
7433#ifndef MODULE
7434
7435
7436
7437
7438
7439
7440static LIST_HEAD(all_detected_devices);
7441struct detected_devices_node {
7442 struct list_head list;
7443 dev_t dev;
7444};
7445
7446void md_autodetect_dev(dev_t dev)
7447{
7448 struct detected_devices_node *node_detected_dev;
7449
7450 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
7451 if (node_detected_dev) {
7452 node_detected_dev->dev = dev;
7453 list_add_tail(&node_detected_dev->list, &all_detected_devices);
7454 } else {
7455 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
7456 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
7457 }
7458}
7459
7460
7461static void autostart_arrays(int part)
7462{
7463 mdk_rdev_t *rdev;
7464 struct detected_devices_node *node_detected_dev;
7465 dev_t dev;
7466 int i_scanned, i_passed;
7467
7468 i_scanned = 0;
7469 i_passed = 0;
7470
7471 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
7472
7473 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
7474 i_scanned++;
7475 node_detected_dev = list_entry(all_detected_devices.next,
7476 struct detected_devices_node, list);
7477 list_del(&node_detected_dev->list);
7478 dev = node_detected_dev->dev;
7479 kfree(node_detected_dev);
7480 rdev = md_import_device(dev,0, 90);
7481 if (IS_ERR(rdev))
7482 continue;
7483
7484 if (test_bit(Faulty, &rdev->flags)) {
7485 MD_BUG();
7486 continue;
7487 }
7488 set_bit(AutoDetected, &rdev->flags);
7489 list_add(&rdev->same_set, &pending_raid_disks);
7490 i_passed++;
7491 }
7492
7493 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
7494 i_scanned, i_passed);
7495
7496 autorun_devices(part);
7497}
7498
7499#endif
7500
7501static __exit void md_exit(void)
7502{
7503 mddev_t *mddev;
7504 struct list_head *tmp;
7505
7506 blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
7507 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
7508
7509 unregister_blkdev(MD_MAJOR,"md");
7510 unregister_blkdev(mdp_major, "mdp");
7511 unregister_reboot_notifier(&md_notifier);
7512 unregister_sysctl_table(raid_table_header);
7513 remove_proc_entry("mdstat", NULL);
7514 for_each_mddev(mddev, tmp) {
7515 export_array(mddev);
7516 mddev->hold_active = 0;
7517 }
7518 destroy_workqueue(md_misc_wq);
7519 destroy_workqueue(md_wq);
7520}
7521
7522subsys_initcall(md_init);
7523module_exit(md_exit)
7524
7525static int get_ro(char *buffer, struct kernel_param *kp)
7526{
7527 return sprintf(buffer, "%d", start_readonly);
7528}
7529static int set_ro(const char *val, struct kernel_param *kp)
7530{
7531 char *e;
7532 int num = simple_strtoul(val, &e, 10);
7533 if (*val && (*e == '\0' || *e == '\n')) {
7534 start_readonly = num;
7535 return 0;
7536 }
7537 return -EINVAL;
7538}
7539
7540module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
7541module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
7542
7543module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
7544
7545EXPORT_SYMBOL(register_md_personality);
7546EXPORT_SYMBOL(unregister_md_personality);
7547EXPORT_SYMBOL(md_error);
7548EXPORT_SYMBOL(md_done_sync);
7549EXPORT_SYMBOL(md_write_start);
7550EXPORT_SYMBOL(md_write_end);
7551EXPORT_SYMBOL(md_register_thread);
7552EXPORT_SYMBOL(md_unregister_thread);
7553EXPORT_SYMBOL(md_wakeup_thread);
7554EXPORT_SYMBOL(md_check_recovery);
7555MODULE_LICENSE("GPL");
7556MODULE_DESCRIPTION("MD RAID framework");
7557MODULE_ALIAS("md");
7558MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
7559