1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/blkdev.h>
37#include <linux/sysctl.h>
38#include <linux/seq_file.h>
39#include <linux/fs.h>
40#include <linux/poll.h>
41#include <linux/ctype.h>
42#include <linux/string.h>
43#include <linux/hdreg.h>
44#include <linux/proc_fs.h>
45#include <linux/random.h>
46#include <linux/module.h>
47#include <linux/reboot.h>
48#include <linux/file.h>
49#include <linux/compat.h>
50#include <linux/delay.h>
51#include <linux/raid/md_p.h>
52#include <linux/raid/md_u.h>
53#include <linux/slab.h>
54#include "md.h"
55#include "bitmap.h"
56
57#ifndef MODULE
58static void autostart_arrays(int part);
59#endif
60
61
62
63
64
65
66static LIST_HEAD(pers_list);
67static DEFINE_SPINLOCK(pers_lock);
68
69static void md_print_devices(void);
70
71static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
72static struct workqueue_struct *md_wq;
73static struct workqueue_struct *md_misc_wq;
74
75static int remove_and_add_spares(struct mddev *mddev,
76 struct md_rdev *this);
77
78#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
79
80
81
82
83
84
85#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
86
87
88
89
90
91
92
93
94
95
96
97
98
99static int sysctl_speed_limit_min = 1000;
100static int sysctl_speed_limit_max = 200000;
101static inline int speed_min(struct mddev *mddev)
102{
103 return mddev->sync_speed_min ?
104 mddev->sync_speed_min : sysctl_speed_limit_min;
105}
106
107static inline int speed_max(struct mddev *mddev)
108{
109 return mddev->sync_speed_max ?
110 mddev->sync_speed_max : sysctl_speed_limit_max;
111}
112
113static struct ctl_table_header *raid_table_header;
114
115static ctl_table raid_table[] = {
116 {
117 .procname = "speed_limit_min",
118 .data = &sysctl_speed_limit_min,
119 .maxlen = sizeof(int),
120 .mode = S_IRUGO|S_IWUSR,
121 .proc_handler = proc_dointvec,
122 },
123 {
124 .procname = "speed_limit_max",
125 .data = &sysctl_speed_limit_max,
126 .maxlen = sizeof(int),
127 .mode = S_IRUGO|S_IWUSR,
128 .proc_handler = proc_dointvec,
129 },
130 { }
131};
132
133static ctl_table raid_dir_table[] = {
134 {
135 .procname = "raid",
136 .maxlen = 0,
137 .mode = S_IRUGO|S_IXUGO,
138 .child = raid_table,
139 },
140 { }
141};
142
143static ctl_table raid_root_table[] = {
144 {
145 .procname = "dev",
146 .maxlen = 0,
147 .mode = 0555,
148 .child = raid_dir_table,
149 },
150 { }
151};
152
153static const struct block_device_operations md_fops;
154
155static int start_readonly;
156
157
158
159
160
161struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
162 struct mddev *mddev)
163{
164 struct bio *b;
165
166 if (!mddev || !mddev->bio_set)
167 return bio_alloc(gfp_mask, nr_iovecs);
168
169 b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
170 if (!b)
171 return NULL;
172 return b;
173}
174EXPORT_SYMBOL_GPL(bio_alloc_mddev);
175
176struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
177 struct mddev *mddev)
178{
179 if (!mddev || !mddev->bio_set)
180 return bio_clone(bio, gfp_mask);
181
182 return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
183}
184EXPORT_SYMBOL_GPL(bio_clone_mddev);
185
186void md_trim_bio(struct bio *bio, int offset, int size)
187{
188
189
190
191
192 int i;
193 struct bio_vec *bvec;
194 int sofar = 0;
195
196 size <<= 9;
197 if (offset == 0 && size == bio->bi_size)
198 return;
199
200 clear_bit(BIO_SEG_VALID, &bio->bi_flags);
201
202 bio_advance(bio, offset << 9);
203
204 bio->bi_size = size;
205
206
207 if (bio->bi_idx) {
208 memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
209 (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
210 bio->bi_vcnt -= bio->bi_idx;
211 bio->bi_idx = 0;
212 }
213
214 bio_for_each_segment(bvec, bio, i) {
215 if (sofar + bvec->bv_len > size)
216 bvec->bv_len = size - sofar;
217 if (bvec->bv_len == 0) {
218 bio->bi_vcnt = i;
219 break;
220 }
221 sofar += bvec->bv_len;
222 }
223}
224EXPORT_SYMBOL_GPL(md_trim_bio);
225
226
227
228
229
230
231
232
233
234
235
236static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
237static atomic_t md_event_count;
238void md_new_event(struct mddev *mddev)
239{
240 atomic_inc(&md_event_count);
241 wake_up(&md_event_waiters);
242}
243EXPORT_SYMBOL_GPL(md_new_event);
244
245
246
247
248static void md_new_event_inintr(struct mddev *mddev)
249{
250 atomic_inc(&md_event_count);
251 wake_up(&md_event_waiters);
252}
253
254
255
256
257
258static LIST_HEAD(all_mddevs);
259static DEFINE_SPINLOCK(all_mddevs_lock);
260
261
262
263
264
265
266
267
268
269#define for_each_mddev(_mddev,_tmp) \
270 \
271 for (({ spin_lock(&all_mddevs_lock); \
272 _tmp = all_mddevs.next; \
273 _mddev = NULL;}); \
274 ({ if (_tmp != &all_mddevs) \
275 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
276 spin_unlock(&all_mddevs_lock); \
277 if (_mddev) mddev_put(_mddev); \
278 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
279 _tmp != &all_mddevs;}); \
280 ({ spin_lock(&all_mddevs_lock); \
281 _tmp = _tmp->next;}) \
282 )
283
284
285
286
287
288
289
290
291
292static void md_make_request(struct request_queue *q, struct bio *bio)
293{
294 const int rw = bio_data_dir(bio);
295 struct mddev *mddev = q->queuedata;
296 int cpu;
297 unsigned int sectors;
298
299 if (mddev == NULL || mddev->pers == NULL
300 || !mddev->ready) {
301 bio_io_error(bio);
302 return;
303 }
304 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
305 bio_endio(bio, bio_sectors(bio) == 0 ? 0 : -EROFS);
306 return;
307 }
308 smp_rmb();
309 rcu_read_lock();
310 if (mddev->suspended) {
311 DEFINE_WAIT(__wait);
312 for (;;) {
313 prepare_to_wait(&mddev->sb_wait, &__wait,
314 TASK_UNINTERRUPTIBLE);
315 if (!mddev->suspended)
316 break;
317 rcu_read_unlock();
318 schedule();
319 rcu_read_lock();
320 }
321 finish_wait(&mddev->sb_wait, &__wait);
322 }
323 atomic_inc(&mddev->active_io);
324 rcu_read_unlock();
325
326
327
328
329
330 sectors = bio_sectors(bio);
331 mddev->pers->make_request(mddev, bio);
332
333 cpu = part_stat_lock();
334 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
335 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
336 part_stat_unlock();
337
338 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
339 wake_up(&mddev->sb_wait);
340}
341
342
343
344
345
346
347
348void mddev_suspend(struct mddev *mddev)
349{
350 BUG_ON(mddev->suspended);
351 mddev->suspended = 1;
352 synchronize_rcu();
353 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
354 mddev->pers->quiesce(mddev, 1);
355
356 del_timer_sync(&mddev->safemode_timer);
357}
358EXPORT_SYMBOL_GPL(mddev_suspend);
359
360void mddev_resume(struct mddev *mddev)
361{
362 mddev->suspended = 0;
363 wake_up(&mddev->sb_wait);
364 mddev->pers->quiesce(mddev, 0);
365
366 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
367 md_wakeup_thread(mddev->thread);
368 md_wakeup_thread(mddev->sync_thread);
369}
370EXPORT_SYMBOL_GPL(mddev_resume);
371
372int mddev_congested(struct mddev *mddev, int bits)
373{
374 return mddev->suspended;
375}
376EXPORT_SYMBOL(mddev_congested);
377
378
379
380
381
382static void md_end_flush(struct bio *bio, int err)
383{
384 struct md_rdev *rdev = bio->bi_private;
385 struct mddev *mddev = rdev->mddev;
386
387 rdev_dec_pending(rdev, mddev);
388
389 if (atomic_dec_and_test(&mddev->flush_pending)) {
390
391 queue_work(md_wq, &mddev->flush_work);
392 }
393 bio_put(bio);
394}
395
396static void md_submit_flush_data(struct work_struct *ws);
397
398static void submit_flushes(struct work_struct *ws)
399{
400 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
401 struct md_rdev *rdev;
402
403 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
404 atomic_set(&mddev->flush_pending, 1);
405 rcu_read_lock();
406 rdev_for_each_rcu(rdev, mddev)
407 if (rdev->raid_disk >= 0 &&
408 !test_bit(Faulty, &rdev->flags)) {
409
410
411
412
413 struct bio *bi;
414 atomic_inc(&rdev->nr_pending);
415 atomic_inc(&rdev->nr_pending);
416 rcu_read_unlock();
417 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
418 bi->bi_end_io = md_end_flush;
419 bi->bi_private = rdev;
420 bi->bi_bdev = rdev->bdev;
421 atomic_inc(&mddev->flush_pending);
422 submit_bio(WRITE_FLUSH, bi);
423 rcu_read_lock();
424 rdev_dec_pending(rdev, mddev);
425 }
426 rcu_read_unlock();
427 if (atomic_dec_and_test(&mddev->flush_pending))
428 queue_work(md_wq, &mddev->flush_work);
429}
430
431static void md_submit_flush_data(struct work_struct *ws)
432{
433 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
434 struct bio *bio = mddev->flush_bio;
435
436 if (bio->bi_size == 0)
437
438 bio_endio(bio, 0);
439 else {
440 bio->bi_rw &= ~REQ_FLUSH;
441 mddev->pers->make_request(mddev, bio);
442 }
443
444 mddev->flush_bio = NULL;
445 wake_up(&mddev->sb_wait);
446}
447
448void md_flush_request(struct mddev *mddev, struct bio *bio)
449{
450 spin_lock_irq(&mddev->write_lock);
451 wait_event_lock_irq(mddev->sb_wait,
452 !mddev->flush_bio,
453 mddev->write_lock);
454 mddev->flush_bio = bio;
455 spin_unlock_irq(&mddev->write_lock);
456
457 INIT_WORK(&mddev->flush_work, submit_flushes);
458 queue_work(md_wq, &mddev->flush_work);
459}
460EXPORT_SYMBOL(md_flush_request);
461
462void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
463{
464 struct mddev *mddev = cb->data;
465 md_wakeup_thread(mddev->thread);
466 kfree(cb);
467}
468EXPORT_SYMBOL(md_unplug);
469
470static inline struct mddev *mddev_get(struct mddev *mddev)
471{
472 atomic_inc(&mddev->active);
473 return mddev;
474}
475
476static void mddev_delayed_delete(struct work_struct *ws);
477
478static void mddev_put(struct mddev *mddev)
479{
480 struct bio_set *bs = NULL;
481
482 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
483 return;
484 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
485 mddev->ctime == 0 && !mddev->hold_active) {
486
487
488 list_del_init(&mddev->all_mddevs);
489 bs = mddev->bio_set;
490 mddev->bio_set = NULL;
491 if (mddev->gendisk) {
492
493
494
495
496
497 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
498 queue_work(md_misc_wq, &mddev->del_work);
499 } else
500 kfree(mddev);
501 }
502 spin_unlock(&all_mddevs_lock);
503 if (bs)
504 bioset_free(bs);
505}
506
507void mddev_init(struct mddev *mddev)
508{
509 mutex_init(&mddev->open_mutex);
510 mutex_init(&mddev->reconfig_mutex);
511 mutex_init(&mddev->bitmap_info.mutex);
512 INIT_LIST_HEAD(&mddev->disks);
513 INIT_LIST_HEAD(&mddev->all_mddevs);
514 init_timer(&mddev->safemode_timer);
515 atomic_set(&mddev->active, 1);
516 atomic_set(&mddev->openers, 0);
517 atomic_set(&mddev->active_io, 0);
518 spin_lock_init(&mddev->write_lock);
519 atomic_set(&mddev->flush_pending, 0);
520 init_waitqueue_head(&mddev->sb_wait);
521 init_waitqueue_head(&mddev->recovery_wait);
522 mddev->reshape_position = MaxSector;
523 mddev->reshape_backwards = 0;
524 mddev->last_sync_action = "none";
525 mddev->resync_min = 0;
526 mddev->resync_max = MaxSector;
527 mddev->level = LEVEL_NONE;
528}
529EXPORT_SYMBOL_GPL(mddev_init);
530
531static struct mddev * mddev_find(dev_t unit)
532{
533 struct mddev *mddev, *new = NULL;
534
535 if (unit && MAJOR(unit) != MD_MAJOR)
536 unit &= ~((1<<MdpMinorShift)-1);
537
538 retry:
539 spin_lock(&all_mddevs_lock);
540
541 if (unit) {
542 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
543 if (mddev->unit == unit) {
544 mddev_get(mddev);
545 spin_unlock(&all_mddevs_lock);
546 kfree(new);
547 return mddev;
548 }
549
550 if (new) {
551 list_add(&new->all_mddevs, &all_mddevs);
552 spin_unlock(&all_mddevs_lock);
553 new->hold_active = UNTIL_IOCTL;
554 return new;
555 }
556 } else if (new) {
557
558 static int next_minor = 512;
559 int start = next_minor;
560 int is_free = 0;
561 int dev = 0;
562 while (!is_free) {
563 dev = MKDEV(MD_MAJOR, next_minor);
564 next_minor++;
565 if (next_minor > MINORMASK)
566 next_minor = 0;
567 if (next_minor == start) {
568
569 spin_unlock(&all_mddevs_lock);
570 kfree(new);
571 return NULL;
572 }
573
574 is_free = 1;
575 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
576 if (mddev->unit == dev) {
577 is_free = 0;
578 break;
579 }
580 }
581 new->unit = dev;
582 new->md_minor = MINOR(dev);
583 new->hold_active = UNTIL_STOP;
584 list_add(&new->all_mddevs, &all_mddevs);
585 spin_unlock(&all_mddevs_lock);
586 return new;
587 }
588 spin_unlock(&all_mddevs_lock);
589
590 new = kzalloc(sizeof(*new), GFP_KERNEL);
591 if (!new)
592 return NULL;
593
594 new->unit = unit;
595 if (MAJOR(unit) == MD_MAJOR)
596 new->md_minor = MINOR(unit);
597 else
598 new->md_minor = MINOR(unit) >> MdpMinorShift;
599
600 mddev_init(new);
601
602 goto retry;
603}
604
605static inline int mddev_lock(struct mddev * mddev)
606{
607 return mutex_lock_interruptible(&mddev->reconfig_mutex);
608}
609
610static inline int mddev_is_locked(struct mddev *mddev)
611{
612 return mutex_is_locked(&mddev->reconfig_mutex);
613}
614
615static inline int mddev_trylock(struct mddev * mddev)
616{
617 return mutex_trylock(&mddev->reconfig_mutex);
618}
619
620static struct attribute_group md_redundancy_group;
621
622static void mddev_unlock(struct mddev * mddev)
623{
624 if (mddev->to_remove) {
625
626
627
628
629
630
631
632
633
634
635
636
637 struct attribute_group *to_remove = mddev->to_remove;
638 mddev->to_remove = NULL;
639 mddev->sysfs_active = 1;
640 mutex_unlock(&mddev->reconfig_mutex);
641
642 if (mddev->kobj.sd) {
643 if (to_remove != &md_redundancy_group)
644 sysfs_remove_group(&mddev->kobj, to_remove);
645 if (mddev->pers == NULL ||
646 mddev->pers->sync_request == NULL) {
647 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
648 if (mddev->sysfs_action)
649 sysfs_put(mddev->sysfs_action);
650 mddev->sysfs_action = NULL;
651 }
652 }
653 mddev->sysfs_active = 0;
654 } else
655 mutex_unlock(&mddev->reconfig_mutex);
656
657
658
659
660 spin_lock(&pers_lock);
661 md_wakeup_thread(mddev->thread);
662 spin_unlock(&pers_lock);
663}
664
665static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr)
666{
667 struct md_rdev *rdev;
668
669 rdev_for_each(rdev, mddev)
670 if (rdev->desc_nr == nr)
671 return rdev;
672
673 return NULL;
674}
675
676static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr)
677{
678 struct md_rdev *rdev;
679
680 rdev_for_each_rcu(rdev, mddev)
681 if (rdev->desc_nr == nr)
682 return rdev;
683
684 return NULL;
685}
686
687static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
688{
689 struct md_rdev *rdev;
690
691 rdev_for_each(rdev, mddev)
692 if (rdev->bdev->bd_dev == dev)
693 return rdev;
694
695 return NULL;
696}
697
698static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
699{
700 struct md_rdev *rdev;
701
702 rdev_for_each_rcu(rdev, mddev)
703 if (rdev->bdev->bd_dev == dev)
704 return rdev;
705
706 return NULL;
707}
708
709static struct md_personality *find_pers(int level, char *clevel)
710{
711 struct md_personality *pers;
712 list_for_each_entry(pers, &pers_list, list) {
713 if (level != LEVEL_NONE && pers->level == level)
714 return pers;
715 if (strcmp(pers->name, clevel)==0)
716 return pers;
717 }
718 return NULL;
719}
720
721
722static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
723{
724 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
725 return MD_NEW_SIZE_SECTORS(num_sectors);
726}
727
728static int alloc_disk_sb(struct md_rdev * rdev)
729{
730 if (rdev->sb_page)
731 MD_BUG();
732
733 rdev->sb_page = alloc_page(GFP_KERNEL);
734 if (!rdev->sb_page) {
735 printk(KERN_ALERT "md: out of memory.\n");
736 return -ENOMEM;
737 }
738
739 return 0;
740}
741
742void md_rdev_clear(struct md_rdev *rdev)
743{
744 if (rdev->sb_page) {
745 put_page(rdev->sb_page);
746 rdev->sb_loaded = 0;
747 rdev->sb_page = NULL;
748 rdev->sb_start = 0;
749 rdev->sectors = 0;
750 }
751 if (rdev->bb_page) {
752 put_page(rdev->bb_page);
753 rdev->bb_page = NULL;
754 }
755 kfree(rdev->badblocks.page);
756 rdev->badblocks.page = NULL;
757}
758EXPORT_SYMBOL_GPL(md_rdev_clear);
759
760static void super_written(struct bio *bio, int error)
761{
762 struct md_rdev *rdev = bio->bi_private;
763 struct mddev *mddev = rdev->mddev;
764
765 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
766 printk("md: super_written gets error=%d, uptodate=%d\n",
767 error, test_bit(BIO_UPTODATE, &bio->bi_flags));
768 WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
769 md_error(mddev, rdev);
770 }
771
772 if (atomic_dec_and_test(&mddev->pending_writes))
773 wake_up(&mddev->sb_wait);
774 bio_put(bio);
775}
776
777void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
778 sector_t sector, int size, struct page *page)
779{
780
781
782
783
784
785
786 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
787
788 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
789 bio->bi_sector = sector;
790 bio_add_page(bio, page, size, 0);
791 bio->bi_private = rdev;
792 bio->bi_end_io = super_written;
793
794 atomic_inc(&mddev->pending_writes);
795 submit_bio(WRITE_FLUSH_FUA, bio);
796}
797
798void md_super_wait(struct mddev *mddev)
799{
800
801 DEFINE_WAIT(wq);
802 for(;;) {
803 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
804 if (atomic_read(&mddev->pending_writes)==0)
805 break;
806 schedule();
807 }
808 finish_wait(&mddev->sb_wait, &wq);
809}
810
811static void bi_complete(struct bio *bio, int error)
812{
813 complete((struct completion*)bio->bi_private);
814}
815
816int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
817 struct page *page, int rw, bool metadata_op)
818{
819 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
820 struct completion event;
821 int ret;
822
823 rw |= REQ_SYNC;
824
825 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
826 rdev->meta_bdev : rdev->bdev;
827 if (metadata_op)
828 bio->bi_sector = sector + rdev->sb_start;
829 else if (rdev->mddev->reshape_position != MaxSector &&
830 (rdev->mddev->reshape_backwards ==
831 (sector >= rdev->mddev->reshape_position)))
832 bio->bi_sector = sector + rdev->new_data_offset;
833 else
834 bio->bi_sector = sector + rdev->data_offset;
835 bio_add_page(bio, page, size, 0);
836 init_completion(&event);
837 bio->bi_private = &event;
838 bio->bi_end_io = bi_complete;
839 submit_bio(rw, bio);
840 wait_for_completion(&event);
841
842 ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
843 bio_put(bio);
844 return ret;
845}
846EXPORT_SYMBOL_GPL(sync_page_io);
847
848static int read_disk_sb(struct md_rdev * rdev, int size)
849{
850 char b[BDEVNAME_SIZE];
851 if (!rdev->sb_page) {
852 MD_BUG();
853 return -EINVAL;
854 }
855 if (rdev->sb_loaded)
856 return 0;
857
858
859 if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
860 goto fail;
861 rdev->sb_loaded = 1;
862 return 0;
863
864fail:
865 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
866 bdevname(rdev->bdev,b));
867 return -EINVAL;
868}
869
870static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
871{
872 return sb1->set_uuid0 == sb2->set_uuid0 &&
873 sb1->set_uuid1 == sb2->set_uuid1 &&
874 sb1->set_uuid2 == sb2->set_uuid2 &&
875 sb1->set_uuid3 == sb2->set_uuid3;
876}
877
878static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
879{
880 int ret;
881 mdp_super_t *tmp1, *tmp2;
882
883 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
884 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
885
886 if (!tmp1 || !tmp2) {
887 ret = 0;
888 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
889 goto abort;
890 }
891
892 *tmp1 = *sb1;
893 *tmp2 = *sb2;
894
895
896
897
898 tmp1->nr_disks = 0;
899 tmp2->nr_disks = 0;
900
901 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
902abort:
903 kfree(tmp1);
904 kfree(tmp2);
905 return ret;
906}
907
908
909static u32 md_csum_fold(u32 csum)
910{
911 csum = (csum & 0xffff) + (csum >> 16);
912 return (csum & 0xffff) + (csum >> 16);
913}
914
915static unsigned int calc_sb_csum(mdp_super_t * sb)
916{
917 u64 newcsum = 0;
918 u32 *sb32 = (u32*)sb;
919 int i;
920 unsigned int disk_csum, csum;
921
922 disk_csum = sb->sb_csum;
923 sb->sb_csum = 0;
924
925 for (i = 0; i < MD_SB_BYTES/4 ; i++)
926 newcsum += sb32[i];
927 csum = (newcsum & 0xffffffff) + (newcsum>>32);
928
929
930#ifdef CONFIG_ALPHA
931
932
933
934
935
936
937
938
939 sb->sb_csum = md_csum_fold(disk_csum);
940#else
941 sb->sb_csum = disk_csum;
942#endif
943 return csum;
944}
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977struct super_type {
978 char *name;
979 struct module *owner;
980 int (*load_super)(struct md_rdev *rdev,
981 struct md_rdev *refdev,
982 int minor_version);
983 int (*validate_super)(struct mddev *mddev,
984 struct md_rdev *rdev);
985 void (*sync_super)(struct mddev *mddev,
986 struct md_rdev *rdev);
987 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
988 sector_t num_sectors);
989 int (*allow_new_offset)(struct md_rdev *rdev,
990 unsigned long long new_offset);
991};
992
993
994
995
996
997
998
999
1000
1001int md_check_no_bitmap(struct mddev *mddev)
1002{
1003 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1004 return 0;
1005 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
1006 mdname(mddev), mddev->pers->name);
1007 return 1;
1008}
1009EXPORT_SYMBOL(md_check_no_bitmap);
1010
1011
1012
1013
1014static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1015{
1016 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1017 mdp_super_t *sb;
1018 int ret;
1019
1020
1021
1022
1023
1024
1025
1026 rdev->sb_start = calc_dev_sboffset(rdev);
1027
1028 ret = read_disk_sb(rdev, MD_SB_BYTES);
1029 if (ret) return ret;
1030
1031 ret = -EINVAL;
1032
1033 bdevname(rdev->bdev, b);
1034 sb = page_address(rdev->sb_page);
1035
1036 if (sb->md_magic != MD_SB_MAGIC) {
1037 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
1038 b);
1039 goto abort;
1040 }
1041
1042 if (sb->major_version != 0 ||
1043 sb->minor_version < 90 ||
1044 sb->minor_version > 91) {
1045 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
1046 sb->major_version, sb->minor_version,
1047 b);
1048 goto abort;
1049 }
1050
1051 if (sb->raid_disks <= 0)
1052 goto abort;
1053
1054 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1055 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
1056 b);
1057 goto abort;
1058 }
1059
1060 rdev->preferred_minor = sb->md_minor;
1061 rdev->data_offset = 0;
1062 rdev->new_data_offset = 0;
1063 rdev->sb_size = MD_SB_BYTES;
1064 rdev->badblocks.shift = -1;
1065
1066 if (sb->level == LEVEL_MULTIPATH)
1067 rdev->desc_nr = -1;
1068 else
1069 rdev->desc_nr = sb->this_disk.number;
1070
1071 if (!refdev) {
1072 ret = 1;
1073 } else {
1074 __u64 ev1, ev2;
1075 mdp_super_t *refsb = page_address(refdev->sb_page);
1076 if (!uuid_equal(refsb, sb)) {
1077 printk(KERN_WARNING "md: %s has different UUID to %s\n",
1078 b, bdevname(refdev->bdev,b2));
1079 goto abort;
1080 }
1081 if (!sb_equal(refsb, sb)) {
1082 printk(KERN_WARNING "md: %s has same UUID"
1083 " but different superblock to %s\n",
1084 b, bdevname(refdev->bdev, b2));
1085 goto abort;
1086 }
1087 ev1 = md_event(sb);
1088 ev2 = md_event(refsb);
1089 if (ev1 > ev2)
1090 ret = 1;
1091 else
1092 ret = 0;
1093 }
1094 rdev->sectors = rdev->sb_start;
1095
1096
1097
1098
1099 if (rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1100 rdev->sectors = (2ULL << 32) - 2;
1101
1102 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1103
1104 ret = -EINVAL;
1105
1106 abort:
1107 return ret;
1108}
1109
1110
1111
1112
1113static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1114{
1115 mdp_disk_t *desc;
1116 mdp_super_t *sb = page_address(rdev->sb_page);
1117 __u64 ev1 = md_event(sb);
1118
1119 rdev->raid_disk = -1;
1120 clear_bit(Faulty, &rdev->flags);
1121 clear_bit(In_sync, &rdev->flags);
1122 clear_bit(WriteMostly, &rdev->flags);
1123
1124 if (mddev->raid_disks == 0) {
1125 mddev->major_version = 0;
1126 mddev->minor_version = sb->minor_version;
1127 mddev->patch_version = sb->patch_version;
1128 mddev->external = 0;
1129 mddev->chunk_sectors = sb->chunk_size >> 9;
1130 mddev->ctime = sb->ctime;
1131 mddev->utime = sb->utime;
1132 mddev->level = sb->level;
1133 mddev->clevel[0] = 0;
1134 mddev->layout = sb->layout;
1135 mddev->raid_disks = sb->raid_disks;
1136 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1137 mddev->events = ev1;
1138 mddev->bitmap_info.offset = 0;
1139 mddev->bitmap_info.space = 0;
1140
1141 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1142 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1143 mddev->reshape_backwards = 0;
1144
1145 if (mddev->minor_version >= 91) {
1146 mddev->reshape_position = sb->reshape_position;
1147 mddev->delta_disks = sb->delta_disks;
1148 mddev->new_level = sb->new_level;
1149 mddev->new_layout = sb->new_layout;
1150 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1151 if (mddev->delta_disks < 0)
1152 mddev->reshape_backwards = 1;
1153 } else {
1154 mddev->reshape_position = MaxSector;
1155 mddev->delta_disks = 0;
1156 mddev->new_level = mddev->level;
1157 mddev->new_layout = mddev->layout;
1158 mddev->new_chunk_sectors = mddev->chunk_sectors;
1159 }
1160
1161 if (sb->state & (1<<MD_SB_CLEAN))
1162 mddev->recovery_cp = MaxSector;
1163 else {
1164 if (sb->events_hi == sb->cp_events_hi &&
1165 sb->events_lo == sb->cp_events_lo) {
1166 mddev->recovery_cp = sb->recovery_cp;
1167 } else
1168 mddev->recovery_cp = 0;
1169 }
1170
1171 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1172 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1173 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1174 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1175
1176 mddev->max_disks = MD_SB_DISKS;
1177
1178 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1179 mddev->bitmap_info.file == NULL) {
1180 mddev->bitmap_info.offset =
1181 mddev->bitmap_info.default_offset;
1182 mddev->bitmap_info.space =
1183 mddev->bitmap_info.space;
1184 }
1185
1186 } else if (mddev->pers == NULL) {
1187
1188
1189 ++ev1;
1190 if (sb->disks[rdev->desc_nr].state & (
1191 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1192 if (ev1 < mddev->events)
1193 return -EINVAL;
1194 } else if (mddev->bitmap) {
1195
1196
1197
1198 if (ev1 < mddev->bitmap->events_cleared)
1199 return 0;
1200 } else {
1201 if (ev1 < mddev->events)
1202
1203 return 0;
1204 }
1205
1206 if (mddev->level != LEVEL_MULTIPATH) {
1207 desc = sb->disks + rdev->desc_nr;
1208
1209 if (desc->state & (1<<MD_DISK_FAULTY))
1210 set_bit(Faulty, &rdev->flags);
1211 else if (desc->state & (1<<MD_DISK_SYNC)
1212) {
1213 set_bit(In_sync, &rdev->flags);
1214 rdev->raid_disk = desc->raid_disk;
1215 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1216
1217
1218
1219 if (mddev->minor_version >= 91) {
1220 rdev->recovery_offset = 0;
1221 rdev->raid_disk = desc->raid_disk;
1222 }
1223 }
1224 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1225 set_bit(WriteMostly, &rdev->flags);
1226 } else
1227 set_bit(In_sync, &rdev->flags);
1228 return 0;
1229}
1230
1231
1232
1233
1234static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1235{
1236 mdp_super_t *sb;
1237 struct md_rdev *rdev2;
1238 int next_spare = mddev->raid_disks;
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251 int i;
1252 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1253
1254 rdev->sb_size = MD_SB_BYTES;
1255
1256 sb = page_address(rdev->sb_page);
1257
1258 memset(sb, 0, sizeof(*sb));
1259
1260 sb->md_magic = MD_SB_MAGIC;
1261 sb->major_version = mddev->major_version;
1262 sb->patch_version = mddev->patch_version;
1263 sb->gvalid_words = 0;
1264 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1265 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1266 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1267 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1268
1269 sb->ctime = mddev->ctime;
1270 sb->level = mddev->level;
1271 sb->size = mddev->dev_sectors / 2;
1272 sb->raid_disks = mddev->raid_disks;
1273 sb->md_minor = mddev->md_minor;
1274 sb->not_persistent = 0;
1275 sb->utime = mddev->utime;
1276 sb->state = 0;
1277 sb->events_hi = (mddev->events>>32);
1278 sb->events_lo = (u32)mddev->events;
1279
1280 if (mddev->reshape_position == MaxSector)
1281 sb->minor_version = 90;
1282 else {
1283 sb->minor_version = 91;
1284 sb->reshape_position = mddev->reshape_position;
1285 sb->new_level = mddev->new_level;
1286 sb->delta_disks = mddev->delta_disks;
1287 sb->new_layout = mddev->new_layout;
1288 sb->new_chunk = mddev->new_chunk_sectors << 9;
1289 }
1290 mddev->minor_version = sb->minor_version;
1291 if (mddev->in_sync)
1292 {
1293 sb->recovery_cp = mddev->recovery_cp;
1294 sb->cp_events_hi = (mddev->events>>32);
1295 sb->cp_events_lo = (u32)mddev->events;
1296 if (mddev->recovery_cp == MaxSector)
1297 sb->state = (1<< MD_SB_CLEAN);
1298 } else
1299 sb->recovery_cp = 0;
1300
1301 sb->layout = mddev->layout;
1302 sb->chunk_size = mddev->chunk_sectors << 9;
1303
1304 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1305 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1306
1307 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1308 rdev_for_each(rdev2, mddev) {
1309 mdp_disk_t *d;
1310 int desc_nr;
1311 int is_active = test_bit(In_sync, &rdev2->flags);
1312
1313 if (rdev2->raid_disk >= 0 &&
1314 sb->minor_version >= 91)
1315
1316
1317
1318
1319 is_active = 1;
1320 if (rdev2->raid_disk < 0 ||
1321 test_bit(Faulty, &rdev2->flags))
1322 is_active = 0;
1323 if (is_active)
1324 desc_nr = rdev2->raid_disk;
1325 else
1326 desc_nr = next_spare++;
1327 rdev2->desc_nr = desc_nr;
1328 d = &sb->disks[rdev2->desc_nr];
1329 nr_disks++;
1330 d->number = rdev2->desc_nr;
1331 d->major = MAJOR(rdev2->bdev->bd_dev);
1332 d->minor = MINOR(rdev2->bdev->bd_dev);
1333 if (is_active)
1334 d->raid_disk = rdev2->raid_disk;
1335 else
1336 d->raid_disk = rdev2->desc_nr;
1337 if (test_bit(Faulty, &rdev2->flags))
1338 d->state = (1<<MD_DISK_FAULTY);
1339 else if (is_active) {
1340 d->state = (1<<MD_DISK_ACTIVE);
1341 if (test_bit(In_sync, &rdev2->flags))
1342 d->state |= (1<<MD_DISK_SYNC);
1343 active++;
1344 working++;
1345 } else {
1346 d->state = 0;
1347 spare++;
1348 working++;
1349 }
1350 if (test_bit(WriteMostly, &rdev2->flags))
1351 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1352 }
1353
1354 for (i=0 ; i < mddev->raid_disks ; i++) {
1355 mdp_disk_t *d = &sb->disks[i];
1356 if (d->state == 0 && d->number == 0) {
1357 d->number = i;
1358 d->raid_disk = i;
1359 d->state = (1<<MD_DISK_REMOVED);
1360 d->state |= (1<<MD_DISK_FAULTY);
1361 failed++;
1362 }
1363 }
1364 sb->nr_disks = nr_disks;
1365 sb->active_disks = active;
1366 sb->working_disks = working;
1367 sb->failed_disks = failed;
1368 sb->spare_disks = spare;
1369
1370 sb->this_disk = sb->disks[rdev->desc_nr];
1371 sb->sb_csum = calc_sb_csum(sb);
1372}
1373
1374
1375
1376
1377static unsigned long long
1378super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1379{
1380 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1381 return 0;
1382 if (rdev->mddev->bitmap_info.offset)
1383 return 0;
1384 rdev->sb_start = calc_dev_sboffset(rdev);
1385 if (!num_sectors || num_sectors > rdev->sb_start)
1386 num_sectors = rdev->sb_start;
1387
1388
1389
1390 if (num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1391 num_sectors = (2ULL << 32) - 2;
1392 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1393 rdev->sb_page);
1394 md_super_wait(rdev->mddev);
1395 return num_sectors;
1396}
1397
1398static int
1399super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1400{
1401
1402 return new_offset == 0;
1403}
1404
1405
1406
1407
1408
1409static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1410{
1411 __le32 disk_csum;
1412 u32 csum;
1413 unsigned long long newcsum;
1414 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1415 __le32 *isuper = (__le32*)sb;
1416
1417 disk_csum = sb->sb_csum;
1418 sb->sb_csum = 0;
1419 newcsum = 0;
1420 for (; size >= 4; size -= 4)
1421 newcsum += le32_to_cpu(*isuper++);
1422
1423 if (size == 2)
1424 newcsum += le16_to_cpu(*(__le16*) isuper);
1425
1426 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1427 sb->sb_csum = disk_csum;
1428 return cpu_to_le32(csum);
1429}
1430
1431static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
1432 int acknowledged);
1433static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1434{
1435 struct mdp_superblock_1 *sb;
1436 int ret;
1437 sector_t sb_start;
1438 sector_t sectors;
1439 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1440 int bmask;
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450 switch(minor_version) {
1451 case 0:
1452 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1453 sb_start -= 8*2;
1454 sb_start &= ~(sector_t)(4*2-1);
1455 break;
1456 case 1:
1457 sb_start = 0;
1458 break;
1459 case 2:
1460 sb_start = 8;
1461 break;
1462 default:
1463 return -EINVAL;
1464 }
1465 rdev->sb_start = sb_start;
1466
1467
1468
1469
1470 ret = read_disk_sb(rdev, 4096);
1471 if (ret) return ret;
1472
1473
1474 sb = page_address(rdev->sb_page);
1475
1476 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1477 sb->major_version != cpu_to_le32(1) ||
1478 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1479 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1480 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1481 return -EINVAL;
1482
1483 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1484 printk("md: invalid superblock checksum on %s\n",
1485 bdevname(rdev->bdev,b));
1486 return -EINVAL;
1487 }
1488 if (le64_to_cpu(sb->data_size) < 10) {
1489 printk("md: data_size too small on %s\n",
1490 bdevname(rdev->bdev,b));
1491 return -EINVAL;
1492 }
1493 if (sb->pad0 ||
1494 sb->pad3[0] ||
1495 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1496
1497 return -EINVAL;
1498
1499 rdev->preferred_minor = 0xffff;
1500 rdev->data_offset = le64_to_cpu(sb->data_offset);
1501 rdev->new_data_offset = rdev->data_offset;
1502 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1503 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1504 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1505 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1506
1507 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1508 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1509 if (rdev->sb_size & bmask)
1510 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1511
1512 if (minor_version
1513 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1514 return -EINVAL;
1515 if (minor_version
1516 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1517 return -EINVAL;
1518
1519 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1520 rdev->desc_nr = -1;
1521 else
1522 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1523
1524 if (!rdev->bb_page) {
1525 rdev->bb_page = alloc_page(GFP_KERNEL);
1526 if (!rdev->bb_page)
1527 return -ENOMEM;
1528 }
1529 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1530 rdev->badblocks.count == 0) {
1531
1532
1533
1534 s32 offset;
1535 sector_t bb_sector;
1536 u64 *bbp;
1537 int i;
1538 int sectors = le16_to_cpu(sb->bblog_size);
1539 if (sectors > (PAGE_SIZE / 512))
1540 return -EINVAL;
1541 offset = le32_to_cpu(sb->bblog_offset);
1542 if (offset == 0)
1543 return -EINVAL;
1544 bb_sector = (long long)offset;
1545 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1546 rdev->bb_page, READ, true))
1547 return -EIO;
1548 bbp = (u64 *)page_address(rdev->bb_page);
1549 rdev->badblocks.shift = sb->bblog_shift;
1550 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1551 u64 bb = le64_to_cpu(*bbp);
1552 int count = bb & (0x3ff);
1553 u64 sector = bb >> 10;
1554 sector <<= sb->bblog_shift;
1555 count <<= sb->bblog_shift;
1556 if (bb + 1 == 0)
1557 break;
1558 if (md_set_badblocks(&rdev->badblocks,
1559 sector, count, 1) == 0)
1560 return -EINVAL;
1561 }
1562 } else if (sb->bblog_offset != 0)
1563 rdev->badblocks.shift = 0;
1564
1565 if (!refdev) {
1566 ret = 1;
1567 } else {
1568 __u64 ev1, ev2;
1569 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1570
1571 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1572 sb->level != refsb->level ||
1573 sb->layout != refsb->layout ||
1574 sb->chunksize != refsb->chunksize) {
1575 printk(KERN_WARNING "md: %s has strangely different"
1576 " superblock to %s\n",
1577 bdevname(rdev->bdev,b),
1578 bdevname(refdev->bdev,b2));
1579 return -EINVAL;
1580 }
1581 ev1 = le64_to_cpu(sb->events);
1582 ev2 = le64_to_cpu(refsb->events);
1583
1584 if (ev1 > ev2)
1585 ret = 1;
1586 else
1587 ret = 0;
1588 }
1589 if (minor_version) {
1590 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1591 sectors -= rdev->data_offset;
1592 } else
1593 sectors = rdev->sb_start;
1594 if (sectors < le64_to_cpu(sb->data_size))
1595 return -EINVAL;
1596 rdev->sectors = le64_to_cpu(sb->data_size);
1597 return ret;
1598}
1599
1600static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1601{
1602 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1603 __u64 ev1 = le64_to_cpu(sb->events);
1604
1605 rdev->raid_disk = -1;
1606 clear_bit(Faulty, &rdev->flags);
1607 clear_bit(In_sync, &rdev->flags);
1608 clear_bit(WriteMostly, &rdev->flags);
1609
1610 if (mddev->raid_disks == 0) {
1611 mddev->major_version = 1;
1612 mddev->patch_version = 0;
1613 mddev->external = 0;
1614 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1615 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1616 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
1617 mddev->level = le32_to_cpu(sb->level);
1618 mddev->clevel[0] = 0;
1619 mddev->layout = le32_to_cpu(sb->layout);
1620 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1621 mddev->dev_sectors = le64_to_cpu(sb->size);
1622 mddev->events = ev1;
1623 mddev->bitmap_info.offset = 0;
1624 mddev->bitmap_info.space = 0;
1625
1626
1627
1628 mddev->bitmap_info.default_offset = 1024 >> 9;
1629 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1630 mddev->reshape_backwards = 0;
1631
1632 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1633 memcpy(mddev->uuid, sb->set_uuid, 16);
1634
1635 mddev->max_disks = (4096-256)/2;
1636
1637 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1638 mddev->bitmap_info.file == NULL) {
1639 mddev->bitmap_info.offset =
1640 (__s32)le32_to_cpu(sb->bitmap_offset);
1641
1642
1643
1644
1645
1646 if (mddev->minor_version > 0)
1647 mddev->bitmap_info.space = 0;
1648 else if (mddev->bitmap_info.offset > 0)
1649 mddev->bitmap_info.space =
1650 8 - mddev->bitmap_info.offset;
1651 else
1652 mddev->bitmap_info.space =
1653 -mddev->bitmap_info.offset;
1654 }
1655
1656 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1657 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1658 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1659 mddev->new_level = le32_to_cpu(sb->new_level);
1660 mddev->new_layout = le32_to_cpu(sb->new_layout);
1661 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1662 if (mddev->delta_disks < 0 ||
1663 (mddev->delta_disks == 0 &&
1664 (le32_to_cpu(sb->feature_map)
1665 & MD_FEATURE_RESHAPE_BACKWARDS)))
1666 mddev->reshape_backwards = 1;
1667 } else {
1668 mddev->reshape_position = MaxSector;
1669 mddev->delta_disks = 0;
1670 mddev->new_level = mddev->level;
1671 mddev->new_layout = mddev->layout;
1672 mddev->new_chunk_sectors = mddev->chunk_sectors;
1673 }
1674
1675 } else if (mddev->pers == NULL) {
1676
1677
1678 ++ev1;
1679 if (rdev->desc_nr >= 0 &&
1680 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1681 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < 0xfffe)
1682 if (ev1 < mddev->events)
1683 return -EINVAL;
1684 } else if (mddev->bitmap) {
1685
1686
1687
1688 if (ev1 < mddev->bitmap->events_cleared)
1689 return 0;
1690 } else {
1691 if (ev1 < mddev->events)
1692
1693 return 0;
1694 }
1695 if (mddev->level != LEVEL_MULTIPATH) {
1696 int role;
1697 if (rdev->desc_nr < 0 ||
1698 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1699 role = 0xffff;
1700 rdev->desc_nr = -1;
1701 } else
1702 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1703 switch(role) {
1704 case 0xffff:
1705 break;
1706 case 0xfffe:
1707 set_bit(Faulty, &rdev->flags);
1708 break;
1709 default:
1710 if ((le32_to_cpu(sb->feature_map) &
1711 MD_FEATURE_RECOVERY_OFFSET))
1712 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1713 else
1714 set_bit(In_sync, &rdev->flags);
1715 rdev->raid_disk = role;
1716 break;
1717 }
1718 if (sb->devflags & WriteMostly1)
1719 set_bit(WriteMostly, &rdev->flags);
1720 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1721 set_bit(Replacement, &rdev->flags);
1722 } else
1723 set_bit(In_sync, &rdev->flags);
1724
1725 return 0;
1726}
1727
1728static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1729{
1730 struct mdp_superblock_1 *sb;
1731 struct md_rdev *rdev2;
1732 int max_dev, i;
1733
1734
1735 sb = page_address(rdev->sb_page);
1736
1737 sb->feature_map = 0;
1738 sb->pad0 = 0;
1739 sb->recovery_offset = cpu_to_le64(0);
1740 memset(sb->pad3, 0, sizeof(sb->pad3));
1741
1742 sb->utime = cpu_to_le64((__u64)mddev->utime);
1743 sb->events = cpu_to_le64(mddev->events);
1744 if (mddev->in_sync)
1745 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1746 else
1747 sb->resync_offset = cpu_to_le64(0);
1748
1749 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1750
1751 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1752 sb->size = cpu_to_le64(mddev->dev_sectors);
1753 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1754 sb->level = cpu_to_le32(mddev->level);
1755 sb->layout = cpu_to_le32(mddev->layout);
1756
1757 if (test_bit(WriteMostly, &rdev->flags))
1758 sb->devflags |= WriteMostly1;
1759 else
1760 sb->devflags &= ~WriteMostly1;
1761 sb->data_offset = cpu_to_le64(rdev->data_offset);
1762 sb->data_size = cpu_to_le64(rdev->sectors);
1763
1764 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1765 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1766 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1767 }
1768
1769 if (rdev->raid_disk >= 0 &&
1770 !test_bit(In_sync, &rdev->flags)) {
1771 sb->feature_map |=
1772 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1773 sb->recovery_offset =
1774 cpu_to_le64(rdev->recovery_offset);
1775 }
1776 if (test_bit(Replacement, &rdev->flags))
1777 sb->feature_map |=
1778 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1779
1780 if (mddev->reshape_position != MaxSector) {
1781 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1782 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1783 sb->new_layout = cpu_to_le32(mddev->new_layout);
1784 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1785 sb->new_level = cpu_to_le32(mddev->new_level);
1786 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1787 if (mddev->delta_disks == 0 &&
1788 mddev->reshape_backwards)
1789 sb->feature_map
1790 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1791 if (rdev->new_data_offset != rdev->data_offset) {
1792 sb->feature_map
1793 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1794 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1795 - rdev->data_offset));
1796 }
1797 }
1798
1799 if (rdev->badblocks.count == 0)
1800 ;
1801 else if (sb->bblog_offset == 0)
1802
1803 md_error(mddev, rdev);
1804 else {
1805 struct badblocks *bb = &rdev->badblocks;
1806 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1807 u64 *p = bb->page;
1808 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1809 if (bb->changed) {
1810 unsigned seq;
1811
1812retry:
1813 seq = read_seqbegin(&bb->lock);
1814
1815 memset(bbp, 0xff, PAGE_SIZE);
1816
1817 for (i = 0 ; i < bb->count ; i++) {
1818 u64 internal_bb = p[i];
1819 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1820 | BB_LEN(internal_bb));
1821 bbp[i] = cpu_to_le64(store_bb);
1822 }
1823 bb->changed = 0;
1824 if (read_seqretry(&bb->lock, seq))
1825 goto retry;
1826
1827 bb->sector = (rdev->sb_start +
1828 (int)le32_to_cpu(sb->bblog_offset));
1829 bb->size = le16_to_cpu(sb->bblog_size);
1830 }
1831 }
1832
1833 max_dev = 0;
1834 rdev_for_each(rdev2, mddev)
1835 if (rdev2->desc_nr+1 > max_dev)
1836 max_dev = rdev2->desc_nr+1;
1837
1838 if (max_dev > le32_to_cpu(sb->max_dev)) {
1839 int bmask;
1840 sb->max_dev = cpu_to_le32(max_dev);
1841 rdev->sb_size = max_dev * 2 + 256;
1842 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1843 if (rdev->sb_size & bmask)
1844 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1845 } else
1846 max_dev = le32_to_cpu(sb->max_dev);
1847
1848 for (i=0; i<max_dev;i++)
1849 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1850
1851 rdev_for_each(rdev2, mddev) {
1852 i = rdev2->desc_nr;
1853 if (test_bit(Faulty, &rdev2->flags))
1854 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1855 else if (test_bit(In_sync, &rdev2->flags))
1856 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1857 else if (rdev2->raid_disk >= 0)
1858 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1859 else
1860 sb->dev_roles[i] = cpu_to_le16(0xffff);
1861 }
1862
1863 sb->sb_csum = calc_sb_1_csum(sb);
1864}
1865
1866static unsigned long long
1867super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1868{
1869 struct mdp_superblock_1 *sb;
1870 sector_t max_sectors;
1871 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1872 return 0;
1873 if (rdev->data_offset != rdev->new_data_offset)
1874 return 0;
1875 if (rdev->sb_start < rdev->data_offset) {
1876
1877 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1878 max_sectors -= rdev->data_offset;
1879 if (!num_sectors || num_sectors > max_sectors)
1880 num_sectors = max_sectors;
1881 } else if (rdev->mddev->bitmap_info.offset) {
1882
1883 return 0;
1884 } else {
1885
1886 sector_t sb_start;
1887 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1888 sb_start &= ~(sector_t)(4*2 - 1);
1889 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1890 if (!num_sectors || num_sectors > max_sectors)
1891 num_sectors = max_sectors;
1892 rdev->sb_start = sb_start;
1893 }
1894 sb = page_address(rdev->sb_page);
1895 sb->data_size = cpu_to_le64(num_sectors);
1896 sb->super_offset = rdev->sb_start;
1897 sb->sb_csum = calc_sb_1_csum(sb);
1898 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1899 rdev->sb_page);
1900 md_super_wait(rdev->mddev);
1901 return num_sectors;
1902
1903}
1904
1905static int
1906super_1_allow_new_offset(struct md_rdev *rdev,
1907 unsigned long long new_offset)
1908{
1909
1910 struct bitmap *bitmap;
1911 if (new_offset >= rdev->data_offset)
1912 return 1;
1913
1914
1915
1916 if (rdev->mddev->minor_version == 0)
1917 return 1;
1918
1919
1920
1921
1922
1923
1924
1925 if (rdev->sb_start + (32+4)*2 > new_offset)
1926 return 0;
1927 bitmap = rdev->mddev->bitmap;
1928 if (bitmap && !rdev->mddev->bitmap_info.file &&
1929 rdev->sb_start + rdev->mddev->bitmap_info.offset +
1930 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
1931 return 0;
1932 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
1933 return 0;
1934
1935 return 1;
1936}
1937
1938static struct super_type super_types[] = {
1939 [0] = {
1940 .name = "0.90.0",
1941 .owner = THIS_MODULE,
1942 .load_super = super_90_load,
1943 .validate_super = super_90_validate,
1944 .sync_super = super_90_sync,
1945 .rdev_size_change = super_90_rdev_size_change,
1946 .allow_new_offset = super_90_allow_new_offset,
1947 },
1948 [1] = {
1949 .name = "md-1",
1950 .owner = THIS_MODULE,
1951 .load_super = super_1_load,
1952 .validate_super = super_1_validate,
1953 .sync_super = super_1_sync,
1954 .rdev_size_change = super_1_rdev_size_change,
1955 .allow_new_offset = super_1_allow_new_offset,
1956 },
1957};
1958
1959static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
1960{
1961 if (mddev->sync_super) {
1962 mddev->sync_super(mddev, rdev);
1963 return;
1964 }
1965
1966 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1967
1968 super_types[mddev->major_version].sync_super(mddev, rdev);
1969}
1970
1971static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
1972{
1973 struct md_rdev *rdev, *rdev2;
1974
1975 rcu_read_lock();
1976 rdev_for_each_rcu(rdev, mddev1)
1977 rdev_for_each_rcu(rdev2, mddev2)
1978 if (rdev->bdev->bd_contains ==
1979 rdev2->bdev->bd_contains) {
1980 rcu_read_unlock();
1981 return 1;
1982 }
1983 rcu_read_unlock();
1984 return 0;
1985}
1986
1987static LIST_HEAD(pending_raid_disks);
1988
1989
1990
1991
1992
1993
1994
1995
1996int md_integrity_register(struct mddev *mddev)
1997{
1998 struct md_rdev *rdev, *reference = NULL;
1999
2000 if (list_empty(&mddev->disks))
2001 return 0;
2002 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2003 return 0;
2004 rdev_for_each(rdev, mddev) {
2005
2006 if (test_bit(Faulty, &rdev->flags))
2007 continue;
2008 if (rdev->raid_disk < 0)
2009 continue;
2010 if (!reference) {
2011
2012 reference = rdev;
2013 continue;
2014 }
2015
2016 if (blk_integrity_compare(reference->bdev->bd_disk,
2017 rdev->bdev->bd_disk) < 0)
2018 return -EINVAL;
2019 }
2020 if (!reference || !bdev_get_integrity(reference->bdev))
2021 return 0;
2022
2023
2024
2025
2026 if (blk_integrity_register(mddev->gendisk,
2027 bdev_get_integrity(reference->bdev)) != 0) {
2028 printk(KERN_ERR "md: failed to register integrity for %s\n",
2029 mdname(mddev));
2030 return -EINVAL;
2031 }
2032 printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
2033 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
2034 printk(KERN_ERR "md: failed to create integrity pool for %s\n",
2035 mdname(mddev));
2036 return -EINVAL;
2037 }
2038 return 0;
2039}
2040EXPORT_SYMBOL(md_integrity_register);
2041
2042
2043void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2044{
2045 struct blk_integrity *bi_rdev;
2046 struct blk_integrity *bi_mddev;
2047
2048 if (!mddev->gendisk)
2049 return;
2050
2051 bi_rdev = bdev_get_integrity(rdev->bdev);
2052 bi_mddev = blk_get_integrity(mddev->gendisk);
2053
2054 if (!bi_mddev)
2055 return;
2056 if (rdev->raid_disk < 0)
2057 return;
2058 if (bi_rdev && blk_integrity_compare(mddev->gendisk,
2059 rdev->bdev->bd_disk) >= 0)
2060 return;
2061 printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
2062 blk_integrity_unregister(mddev->gendisk);
2063}
2064EXPORT_SYMBOL(md_integrity_add_rdev);
2065
2066static int bind_rdev_to_array(struct md_rdev * rdev, struct mddev * mddev)
2067{
2068 char b[BDEVNAME_SIZE];
2069 struct kobject *ko;
2070 char *s;
2071 int err;
2072
2073 if (rdev->mddev) {
2074 MD_BUG();
2075 return -EINVAL;
2076 }
2077
2078
2079 if (find_rdev(mddev, rdev->bdev->bd_dev))
2080 return -EEXIST;
2081
2082
2083 if (rdev->sectors && (mddev->dev_sectors == 0 ||
2084 rdev->sectors < mddev->dev_sectors)) {
2085 if (mddev->pers) {
2086
2087
2088
2089
2090 if (mddev->level > 0)
2091 return -ENOSPC;
2092 } else
2093 mddev->dev_sectors = rdev->sectors;
2094 }
2095
2096
2097
2098
2099
2100 if (rdev->desc_nr < 0) {
2101 int choice = 0;
2102 if (mddev->pers) choice = mddev->raid_disks;
2103 while (find_rdev_nr(mddev, choice))
2104 choice++;
2105 rdev->desc_nr = choice;
2106 } else {
2107 if (find_rdev_nr(mddev, rdev->desc_nr))
2108 return -EBUSY;
2109 }
2110 if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2111 printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
2112 mdname(mddev), mddev->max_disks);
2113 return -EBUSY;
2114 }
2115 bdevname(rdev->bdev,b);
2116 while ( (s=strchr(b, '/')) != NULL)
2117 *s = '!';
2118
2119 rdev->mddev = mddev;
2120 printk(KERN_INFO "md: bind<%s>\n", b);
2121
2122 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2123 goto fail;
2124
2125 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2126 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2127 ;
2128 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2129
2130 list_add_rcu(&rdev->same_set, &mddev->disks);
2131 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2132
2133
2134 mddev->recovery_disabled++;
2135
2136 return 0;
2137
2138 fail:
2139 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
2140 b, mdname(mddev));
2141 return err;
2142}
2143
2144static void md_delayed_delete(struct work_struct *ws)
2145{
2146 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2147 kobject_del(&rdev->kobj);
2148 kobject_put(&rdev->kobj);
2149}
2150
2151static void unbind_rdev_from_array(struct md_rdev * rdev)
2152{
2153 char b[BDEVNAME_SIZE];
2154 if (!rdev->mddev) {
2155 MD_BUG();
2156 return;
2157 }
2158 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2159 list_del_rcu(&rdev->same_set);
2160 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
2161 rdev->mddev = NULL;
2162 sysfs_remove_link(&rdev->kobj, "block");
2163 sysfs_put(rdev->sysfs_state);
2164 rdev->sysfs_state = NULL;
2165 rdev->badblocks.count = 0;
2166
2167
2168
2169
2170 synchronize_rcu();
2171 INIT_WORK(&rdev->del_work, md_delayed_delete);
2172 kobject_get(&rdev->kobj);
2173 queue_work(md_misc_wq, &rdev->del_work);
2174}
2175
2176
2177
2178
2179
2180
2181static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2182{
2183 int err = 0;
2184 struct block_device *bdev;
2185 char b[BDEVNAME_SIZE];
2186
2187 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2188 shared ? (struct md_rdev *)lock_rdev : rdev);
2189 if (IS_ERR(bdev)) {
2190 printk(KERN_ERR "md: could not open %s.\n",
2191 __bdevname(dev, b));
2192 return PTR_ERR(bdev);
2193 }
2194 rdev->bdev = bdev;
2195 return err;
2196}
2197
2198static void unlock_rdev(struct md_rdev *rdev)
2199{
2200 struct block_device *bdev = rdev->bdev;
2201 rdev->bdev = NULL;
2202 if (!bdev)
2203 MD_BUG();
2204 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2205}
2206
2207void md_autodetect_dev(dev_t dev);
2208
2209static void export_rdev(struct md_rdev * rdev)
2210{
2211 char b[BDEVNAME_SIZE];
2212 printk(KERN_INFO "md: export_rdev(%s)\n",
2213 bdevname(rdev->bdev,b));
2214 if (rdev->mddev)
2215 MD_BUG();
2216 md_rdev_clear(rdev);
2217#ifndef MODULE
2218 if (test_bit(AutoDetected, &rdev->flags))
2219 md_autodetect_dev(rdev->bdev->bd_dev);
2220#endif
2221 unlock_rdev(rdev);
2222 kobject_put(&rdev->kobj);
2223}
2224
2225static void kick_rdev_from_array(struct md_rdev * rdev)
2226{
2227 unbind_rdev_from_array(rdev);
2228 export_rdev(rdev);
2229}
2230
2231static void export_array(struct mddev *mddev)
2232{
2233 struct md_rdev *rdev, *tmp;
2234
2235 rdev_for_each_safe(rdev, tmp, mddev) {
2236 if (!rdev->mddev) {
2237 MD_BUG();
2238 continue;
2239 }
2240 kick_rdev_from_array(rdev);
2241 }
2242 if (!list_empty(&mddev->disks))
2243 MD_BUG();
2244 mddev->raid_disks = 0;
2245 mddev->major_version = 0;
2246}
2247
2248static void print_desc(mdp_disk_t *desc)
2249{
2250 printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
2251 desc->major,desc->minor,desc->raid_disk,desc->state);
2252}
2253
2254static void print_sb_90(mdp_super_t *sb)
2255{
2256 int i;
2257
2258 printk(KERN_INFO
2259 "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
2260 sb->major_version, sb->minor_version, sb->patch_version,
2261 sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
2262 sb->ctime);
2263 printk(KERN_INFO "md: L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
2264 sb->level, sb->size, sb->nr_disks, sb->raid_disks,
2265 sb->md_minor, sb->layout, sb->chunk_size);
2266 printk(KERN_INFO "md: UT:%08x ST:%d AD:%d WD:%d"
2267 " FD:%d SD:%d CSUM:%08x E:%08lx\n",
2268 sb->utime, sb->state, sb->active_disks, sb->working_disks,
2269 sb->failed_disks, sb->spare_disks,
2270 sb->sb_csum, (unsigned long)sb->events_lo);
2271
2272 printk(KERN_INFO);
2273 for (i = 0; i < MD_SB_DISKS; i++) {
2274 mdp_disk_t *desc;
2275
2276 desc = sb->disks + i;
2277 if (desc->number || desc->major || desc->minor ||
2278 desc->raid_disk || (desc->state && (desc->state != 4))) {
2279 printk(" D %2d: ", i);
2280 print_desc(desc);
2281 }
2282 }
2283 printk(KERN_INFO "md: THIS: ");
2284 print_desc(&sb->this_disk);
2285}
2286
2287static void print_sb_1(struct mdp_superblock_1 *sb)
2288{
2289 __u8 *uuid;
2290
2291 uuid = sb->set_uuid;
2292 printk(KERN_INFO
2293 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n"
2294 "md: Name: \"%s\" CT:%llu\n",
2295 le32_to_cpu(sb->major_version),
2296 le32_to_cpu(sb->feature_map),
2297 uuid,
2298 sb->set_name,
2299 (unsigned long long)le64_to_cpu(sb->ctime)
2300 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
2301
2302 uuid = sb->device_uuid;
2303 printk(KERN_INFO
2304 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
2305 " RO:%llu\n"
2306 "md: Dev:%08x UUID: %pU\n"
2307 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
2308 "md: (MaxDev:%u) \n",
2309 le32_to_cpu(sb->level),
2310 (unsigned long long)le64_to_cpu(sb->size),
2311 le32_to_cpu(sb->raid_disks),
2312 le32_to_cpu(sb->layout),
2313 le32_to_cpu(sb->chunksize),
2314 (unsigned long long)le64_to_cpu(sb->data_offset),
2315 (unsigned long long)le64_to_cpu(sb->data_size),
2316 (unsigned long long)le64_to_cpu(sb->super_offset),
2317 (unsigned long long)le64_to_cpu(sb->recovery_offset),
2318 le32_to_cpu(sb->dev_number),
2319 uuid,
2320 sb->devflags,
2321 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
2322 (unsigned long long)le64_to_cpu(sb->events),
2323 (unsigned long long)le64_to_cpu(sb->resync_offset),
2324 le32_to_cpu(sb->sb_csum),
2325 le32_to_cpu(sb->max_dev)
2326 );
2327}
2328
2329static void print_rdev(struct md_rdev *rdev, int major_version)
2330{
2331 char b[BDEVNAME_SIZE];
2332 printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
2333 bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
2334 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
2335 rdev->desc_nr);
2336 if (rdev->sb_loaded) {
2337 printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
2338 switch (major_version) {
2339 case 0:
2340 print_sb_90(page_address(rdev->sb_page));
2341 break;
2342 case 1:
2343 print_sb_1(page_address(rdev->sb_page));
2344 break;
2345 }
2346 } else
2347 printk(KERN_INFO "md: no rdev superblock!\n");
2348}
2349
2350static void md_print_devices(void)
2351{
2352 struct list_head *tmp;
2353 struct md_rdev *rdev;
2354 struct mddev *mddev;
2355 char b[BDEVNAME_SIZE];
2356
2357 printk("\n");
2358 printk("md: **********************************\n");
2359 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
2360 printk("md: **********************************\n");
2361 for_each_mddev(mddev, tmp) {
2362
2363 if (mddev->bitmap)
2364 bitmap_print_sb(mddev->bitmap);
2365 else
2366 printk("%s: ", mdname(mddev));
2367 rdev_for_each(rdev, mddev)
2368 printk("<%s>", bdevname(rdev->bdev,b));
2369 printk("\n");
2370
2371 rdev_for_each(rdev, mddev)
2372 print_rdev(rdev, mddev->major_version);
2373 }
2374 printk("md: **********************************\n");
2375 printk("\n");
2376}
2377
2378
2379static void sync_sbs(struct mddev * mddev, int nospares)
2380{
2381
2382
2383
2384
2385
2386
2387 struct md_rdev *rdev;
2388 rdev_for_each(rdev, mddev) {
2389 if (rdev->sb_events == mddev->events ||
2390 (nospares &&
2391 rdev->raid_disk < 0 &&
2392 rdev->sb_events+1 == mddev->events)) {
2393
2394 rdev->sb_loaded = 2;
2395 } else {
2396 sync_super(mddev, rdev);
2397 rdev->sb_loaded = 1;
2398 }
2399 }
2400}
2401
2402static void md_update_sb(struct mddev * mddev, int force_change)
2403{
2404 struct md_rdev *rdev;
2405 int sync_req;
2406 int nospares = 0;
2407 int any_badblocks_changed = 0;
2408
2409 if (mddev->ro) {
2410 if (force_change)
2411 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2412 return;
2413 }
2414repeat:
2415
2416 rdev_for_each(rdev, mddev) {
2417 if (rdev->raid_disk >= 0 &&
2418 mddev->delta_disks >= 0 &&
2419 !test_bit(In_sync, &rdev->flags) &&
2420 mddev->curr_resync_completed > rdev->recovery_offset)
2421 rdev->recovery_offset = mddev->curr_resync_completed;
2422
2423 }
2424 if (!mddev->persistent) {
2425 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2426 clear_bit(MD_CHANGE_DEVS, &mddev->flags);
2427 if (!mddev->external) {
2428 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2429 rdev_for_each(rdev, mddev) {
2430 if (rdev->badblocks.changed) {
2431 rdev->badblocks.changed = 0;
2432 md_ack_all_badblocks(&rdev->badblocks);
2433 md_error(mddev, rdev);
2434 }
2435 clear_bit(Blocked, &rdev->flags);
2436 clear_bit(BlockedBadBlocks, &rdev->flags);
2437 wake_up(&rdev->blocked_wait);
2438 }
2439 }
2440 wake_up(&mddev->sb_wait);
2441 return;
2442 }
2443
2444 spin_lock_irq(&mddev->write_lock);
2445
2446 mddev->utime = get_seconds();
2447
2448 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2449 force_change = 1;
2450 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2451
2452
2453
2454
2455 nospares = 1;
2456 if (force_change)
2457 nospares = 0;
2458 if (mddev->degraded)
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468 nospares = 0;
2469
2470 sync_req = mddev->in_sync;
2471
2472
2473
2474 if (nospares
2475 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2476 && mddev->can_decrease_events
2477 && mddev->events != 1) {
2478 mddev->events--;
2479 mddev->can_decrease_events = 0;
2480 } else {
2481
2482 mddev->events ++;
2483 mddev->can_decrease_events = nospares;
2484 }
2485
2486 if (!mddev->events) {
2487
2488
2489
2490
2491
2492 MD_BUG();
2493 mddev->events --;
2494 }
2495
2496 rdev_for_each(rdev, mddev) {
2497 if (rdev->badblocks.changed)
2498 any_badblocks_changed++;
2499 if (test_bit(Faulty, &rdev->flags))
2500 set_bit(FaultRecorded, &rdev->flags);
2501 }
2502
2503 sync_sbs(mddev, nospares);
2504 spin_unlock_irq(&mddev->write_lock);
2505
2506 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2507 mdname(mddev), mddev->in_sync);
2508
2509 bitmap_update_sb(mddev->bitmap);
2510 rdev_for_each(rdev, mddev) {
2511 char b[BDEVNAME_SIZE];
2512
2513 if (rdev->sb_loaded != 1)
2514 continue;
2515
2516 if (!test_bit(Faulty, &rdev->flags) &&
2517 rdev->saved_raid_disk == -1) {
2518 md_super_write(mddev,rdev,
2519 rdev->sb_start, rdev->sb_size,
2520 rdev->sb_page);
2521 pr_debug("md: (write) %s's sb offset: %llu\n",
2522 bdevname(rdev->bdev, b),
2523 (unsigned long long)rdev->sb_start);
2524 rdev->sb_events = mddev->events;
2525 if (rdev->badblocks.size) {
2526 md_super_write(mddev, rdev,
2527 rdev->badblocks.sector,
2528 rdev->badblocks.size << 9,
2529 rdev->bb_page);
2530 rdev->badblocks.size = 0;
2531 }
2532
2533 } else if (test_bit(Faulty, &rdev->flags))
2534 pr_debug("md: %s (skipping faulty)\n",
2535 bdevname(rdev->bdev, b));
2536 else
2537 pr_debug("(skipping incremental s/r ");
2538
2539 if (mddev->level == LEVEL_MULTIPATH)
2540
2541 break;
2542 }
2543 md_super_wait(mddev);
2544
2545
2546 spin_lock_irq(&mddev->write_lock);
2547 if (mddev->in_sync != sync_req ||
2548 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
2549
2550 spin_unlock_irq(&mddev->write_lock);
2551 goto repeat;
2552 }
2553 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2554 spin_unlock_irq(&mddev->write_lock);
2555 wake_up(&mddev->sb_wait);
2556 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2557 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2558
2559 rdev_for_each(rdev, mddev) {
2560 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2561 clear_bit(Blocked, &rdev->flags);
2562
2563 if (any_badblocks_changed)
2564 md_ack_all_badblocks(&rdev->badblocks);
2565 clear_bit(BlockedBadBlocks, &rdev->flags);
2566 wake_up(&rdev->blocked_wait);
2567 }
2568}
2569
2570
2571
2572
2573static int cmd_match(const char *cmd, const char *str)
2574{
2575
2576
2577
2578
2579 while (*cmd && *str && *cmd == *str) {
2580 cmd++;
2581 str++;
2582 }
2583 if (*cmd == '\n')
2584 cmd++;
2585 if (*str || *cmd)
2586 return 0;
2587 return 1;
2588}
2589
2590struct rdev_sysfs_entry {
2591 struct attribute attr;
2592 ssize_t (*show)(struct md_rdev *, char *);
2593 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2594};
2595
2596static ssize_t
2597state_show(struct md_rdev *rdev, char *page)
2598{
2599 char *sep = "";
2600 size_t len = 0;
2601
2602 if (test_bit(Faulty, &rdev->flags) ||
2603 rdev->badblocks.unacked_exist) {
2604 len+= sprintf(page+len, "%sfaulty",sep);
2605 sep = ",";
2606 }
2607 if (test_bit(In_sync, &rdev->flags)) {
2608 len += sprintf(page+len, "%sin_sync",sep);
2609 sep = ",";
2610 }
2611 if (test_bit(WriteMostly, &rdev->flags)) {
2612 len += sprintf(page+len, "%swrite_mostly",sep);
2613 sep = ",";
2614 }
2615 if (test_bit(Blocked, &rdev->flags) ||
2616 (rdev->badblocks.unacked_exist
2617 && !test_bit(Faulty, &rdev->flags))) {
2618 len += sprintf(page+len, "%sblocked", sep);
2619 sep = ",";
2620 }
2621 if (!test_bit(Faulty, &rdev->flags) &&
2622 !test_bit(In_sync, &rdev->flags)) {
2623 len += sprintf(page+len, "%sspare", sep);
2624 sep = ",";
2625 }
2626 if (test_bit(WriteErrorSeen, &rdev->flags)) {
2627 len += sprintf(page+len, "%swrite_error", sep);
2628 sep = ",";
2629 }
2630 if (test_bit(WantReplacement, &rdev->flags)) {
2631 len += sprintf(page+len, "%swant_replacement", sep);
2632 sep = ",";
2633 }
2634 if (test_bit(Replacement, &rdev->flags)) {
2635 len += sprintf(page+len, "%sreplacement", sep);
2636 sep = ",";
2637 }
2638
2639 return len+sprintf(page+len, "\n");
2640}
2641
2642static ssize_t
2643state_store(struct md_rdev *rdev, const char *buf, size_t len)
2644{
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656 int err = -EINVAL;
2657 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2658 md_error(rdev->mddev, rdev);
2659 if (test_bit(Faulty, &rdev->flags))
2660 err = 0;
2661 else
2662 err = -EBUSY;
2663 } else if (cmd_match(buf, "remove")) {
2664 if (rdev->raid_disk >= 0)
2665 err = -EBUSY;
2666 else {
2667 struct mddev *mddev = rdev->mddev;
2668 kick_rdev_from_array(rdev);
2669 if (mddev->pers)
2670 md_update_sb(mddev, 1);
2671 md_new_event(mddev);
2672 err = 0;
2673 }
2674 } else if (cmd_match(buf, "writemostly")) {
2675 set_bit(WriteMostly, &rdev->flags);
2676 err = 0;
2677 } else if (cmd_match(buf, "-writemostly")) {
2678 clear_bit(WriteMostly, &rdev->flags);
2679 err = 0;
2680 } else if (cmd_match(buf, "blocked")) {
2681 set_bit(Blocked, &rdev->flags);
2682 err = 0;
2683 } else if (cmd_match(buf, "-blocked")) {
2684 if (!test_bit(Faulty, &rdev->flags) &&
2685 rdev->badblocks.unacked_exist) {
2686
2687
2688
2689 md_error(rdev->mddev, rdev);
2690 }
2691 clear_bit(Blocked, &rdev->flags);
2692 clear_bit(BlockedBadBlocks, &rdev->flags);
2693 wake_up(&rdev->blocked_wait);
2694 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2695 md_wakeup_thread(rdev->mddev->thread);
2696
2697 err = 0;
2698 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2699 set_bit(In_sync, &rdev->flags);
2700 err = 0;
2701 } else if (cmd_match(buf, "write_error")) {
2702 set_bit(WriteErrorSeen, &rdev->flags);
2703 err = 0;
2704 } else if (cmd_match(buf, "-write_error")) {
2705 clear_bit(WriteErrorSeen, &rdev->flags);
2706 err = 0;
2707 } else if (cmd_match(buf, "want_replacement")) {
2708
2709
2710
2711
2712 if (rdev->raid_disk >= 0 &&
2713 !test_bit(Replacement, &rdev->flags))
2714 set_bit(WantReplacement, &rdev->flags);
2715 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2716 md_wakeup_thread(rdev->mddev->thread);
2717 err = 0;
2718 } else if (cmd_match(buf, "-want_replacement")) {
2719
2720
2721
2722 err = 0;
2723 clear_bit(WantReplacement, &rdev->flags);
2724 } else if (cmd_match(buf, "replacement")) {
2725
2726
2727
2728
2729 if (rdev->mddev->pers)
2730 err = -EBUSY;
2731 else {
2732 set_bit(Replacement, &rdev->flags);
2733 err = 0;
2734 }
2735 } else if (cmd_match(buf, "-replacement")) {
2736
2737 if (rdev->mddev->pers)
2738 err = -EBUSY;
2739 else {
2740 clear_bit(Replacement, &rdev->flags);
2741 err = 0;
2742 }
2743 }
2744 if (!err)
2745 sysfs_notify_dirent_safe(rdev->sysfs_state);
2746 return err ? err : len;
2747}
2748static struct rdev_sysfs_entry rdev_state =
2749__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
2750
2751static ssize_t
2752errors_show(struct md_rdev *rdev, char *page)
2753{
2754 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2755}
2756
2757static ssize_t
2758errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2759{
2760 char *e;
2761 unsigned long n = simple_strtoul(buf, &e, 10);
2762 if (*buf && (*e == 0 || *e == '\n')) {
2763 atomic_set(&rdev->corrected_errors, n);
2764 return len;
2765 }
2766 return -EINVAL;
2767}
2768static struct rdev_sysfs_entry rdev_errors =
2769__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2770
2771static ssize_t
2772slot_show(struct md_rdev *rdev, char *page)
2773{
2774 if (rdev->raid_disk < 0)
2775 return sprintf(page, "none\n");
2776 else
2777 return sprintf(page, "%d\n", rdev->raid_disk);
2778}
2779
2780static ssize_t
2781slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2782{
2783 char *e;
2784 int err;
2785 int slot = simple_strtoul(buf, &e, 10);
2786 if (strncmp(buf, "none", 4)==0)
2787 slot = -1;
2788 else if (e==buf || (*e && *e!= '\n'))
2789 return -EINVAL;
2790 if (rdev->mddev->pers && slot == -1) {
2791
2792
2793
2794
2795
2796
2797
2798 if (rdev->raid_disk == -1)
2799 return -EEXIST;
2800
2801 if (rdev->mddev->pers->hot_remove_disk == NULL)
2802 return -EINVAL;
2803 clear_bit(Blocked, &rdev->flags);
2804 remove_and_add_spares(rdev->mddev, rdev);
2805 if (rdev->raid_disk >= 0)
2806 return -EBUSY;
2807 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2808 md_wakeup_thread(rdev->mddev->thread);
2809 } else if (rdev->mddev->pers) {
2810
2811
2812
2813
2814 if (rdev->raid_disk != -1)
2815 return -EBUSY;
2816
2817 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2818 return -EBUSY;
2819
2820 if (rdev->mddev->pers->hot_add_disk == NULL)
2821 return -EINVAL;
2822
2823 if (slot >= rdev->mddev->raid_disks &&
2824 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2825 return -ENOSPC;
2826
2827 rdev->raid_disk = slot;
2828 if (test_bit(In_sync, &rdev->flags))
2829 rdev->saved_raid_disk = slot;
2830 else
2831 rdev->saved_raid_disk = -1;
2832 clear_bit(In_sync, &rdev->flags);
2833 err = rdev->mddev->pers->
2834 hot_add_disk(rdev->mddev, rdev);
2835 if (err) {
2836 rdev->raid_disk = -1;
2837 return err;
2838 } else
2839 sysfs_notify_dirent_safe(rdev->sysfs_state);
2840 if (sysfs_link_rdev(rdev->mddev, rdev))
2841 ;
2842
2843 } else {
2844 if (slot >= rdev->mddev->raid_disks &&
2845 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2846 return -ENOSPC;
2847 rdev->raid_disk = slot;
2848
2849 clear_bit(Faulty, &rdev->flags);
2850 clear_bit(WriteMostly, &rdev->flags);
2851 set_bit(In_sync, &rdev->flags);
2852 sysfs_notify_dirent_safe(rdev->sysfs_state);
2853 }
2854 return len;
2855}
2856
2857
2858static struct rdev_sysfs_entry rdev_slot =
2859__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2860
2861static ssize_t
2862offset_show(struct md_rdev *rdev, char *page)
2863{
2864 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2865}
2866
2867static ssize_t
2868offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2869{
2870 unsigned long long offset;
2871 if (kstrtoull(buf, 10, &offset) < 0)
2872 return -EINVAL;
2873 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2874 return -EBUSY;
2875 if (rdev->sectors && rdev->mddev->external)
2876
2877
2878 return -EBUSY;
2879 rdev->data_offset = offset;
2880 rdev->new_data_offset = offset;
2881 return len;
2882}
2883
2884static struct rdev_sysfs_entry rdev_offset =
2885__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2886
2887static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
2888{
2889 return sprintf(page, "%llu\n",
2890 (unsigned long long)rdev->new_data_offset);
2891}
2892
2893static ssize_t new_offset_store(struct md_rdev *rdev,
2894 const char *buf, size_t len)
2895{
2896 unsigned long long new_offset;
2897 struct mddev *mddev = rdev->mddev;
2898
2899 if (kstrtoull(buf, 10, &new_offset) < 0)
2900 return -EINVAL;
2901
2902 if (mddev->sync_thread)
2903 return -EBUSY;
2904 if (new_offset == rdev->data_offset)
2905
2906 ;
2907 else if (new_offset > rdev->data_offset) {
2908
2909 if (new_offset - rdev->data_offset
2910 + mddev->dev_sectors > rdev->sectors)
2911 return -E2BIG;
2912 }
2913
2914
2915
2916
2917
2918 if (new_offset < rdev->data_offset &&
2919 mddev->reshape_backwards)
2920 return -EINVAL;
2921
2922
2923
2924
2925 if (new_offset > rdev->data_offset &&
2926 !mddev->reshape_backwards)
2927 return -EINVAL;
2928
2929 if (mddev->pers && mddev->persistent &&
2930 !super_types[mddev->major_version]
2931 .allow_new_offset(rdev, new_offset))
2932 return -E2BIG;
2933 rdev->new_data_offset = new_offset;
2934 if (new_offset > rdev->data_offset)
2935 mddev->reshape_backwards = 1;
2936 else if (new_offset < rdev->data_offset)
2937 mddev->reshape_backwards = 0;
2938
2939 return len;
2940}
2941static struct rdev_sysfs_entry rdev_new_offset =
2942__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
2943
2944static ssize_t
2945rdev_size_show(struct md_rdev *rdev, char *page)
2946{
2947 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2948}
2949
2950static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2951{
2952
2953 if (s1+l1 <= s2)
2954 return 0;
2955 if (s2+l2 <= s1)
2956 return 0;
2957 return 1;
2958}
2959
2960static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2961{
2962 unsigned long long blocks;
2963 sector_t new;
2964
2965 if (kstrtoull(buf, 10, &blocks) < 0)
2966 return -EINVAL;
2967
2968 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2969 return -EINVAL;
2970
2971 new = blocks * 2;
2972 if (new != blocks * 2)
2973 return -EINVAL;
2974
2975 *sectors = new;
2976 return 0;
2977}
2978
2979static ssize_t
2980rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
2981{
2982 struct mddev *my_mddev = rdev->mddev;
2983 sector_t oldsectors = rdev->sectors;
2984 sector_t sectors;
2985
2986 if (strict_blocks_to_sectors(buf, §ors) < 0)
2987 return -EINVAL;
2988 if (rdev->data_offset != rdev->new_data_offset)
2989 return -EINVAL;
2990 if (my_mddev->pers && rdev->raid_disk >= 0) {
2991 if (my_mddev->persistent) {
2992 sectors = super_types[my_mddev->major_version].
2993 rdev_size_change(rdev, sectors);
2994 if (!sectors)
2995 return -EBUSY;
2996 } else if (!sectors)
2997 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
2998 rdev->data_offset;
2999 if (!my_mddev->pers->resize)
3000
3001 return -EINVAL;
3002 }
3003 if (sectors < my_mddev->dev_sectors)
3004 return -EINVAL;
3005
3006 rdev->sectors = sectors;
3007 if (sectors > oldsectors && my_mddev->external) {
3008
3009
3010
3011
3012
3013 struct mddev *mddev;
3014 int overlap = 0;
3015 struct list_head *tmp;
3016
3017 mddev_unlock(my_mddev);
3018 for_each_mddev(mddev, tmp) {
3019 struct md_rdev *rdev2;
3020
3021 mddev_lock(mddev);
3022 rdev_for_each(rdev2, mddev)
3023 if (rdev->bdev == rdev2->bdev &&
3024 rdev != rdev2 &&
3025 overlaps(rdev->data_offset, rdev->sectors,
3026 rdev2->data_offset,
3027 rdev2->sectors)) {
3028 overlap = 1;
3029 break;
3030 }
3031 mddev_unlock(mddev);
3032 if (overlap) {
3033 mddev_put(mddev);
3034 break;
3035 }
3036 }
3037 mddev_lock(my_mddev);
3038 if (overlap) {
3039
3040
3041
3042
3043
3044
3045 rdev->sectors = oldsectors;
3046 return -EBUSY;
3047 }
3048 }
3049 return len;
3050}
3051
3052static struct rdev_sysfs_entry rdev_size =
3053__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3054
3055
3056static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3057{
3058 unsigned long long recovery_start = rdev->recovery_offset;
3059
3060 if (test_bit(In_sync, &rdev->flags) ||
3061 recovery_start == MaxSector)
3062 return sprintf(page, "none\n");
3063
3064 return sprintf(page, "%llu\n", recovery_start);
3065}
3066
3067static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3068{
3069 unsigned long long recovery_start;
3070
3071 if (cmd_match(buf, "none"))
3072 recovery_start = MaxSector;
3073 else if (kstrtoull(buf, 10, &recovery_start))
3074 return -EINVAL;
3075
3076 if (rdev->mddev->pers &&
3077 rdev->raid_disk >= 0)
3078 return -EBUSY;
3079
3080 rdev->recovery_offset = recovery_start;
3081 if (recovery_start == MaxSector)
3082 set_bit(In_sync, &rdev->flags);
3083 else
3084 clear_bit(In_sync, &rdev->flags);
3085 return len;
3086}
3087
3088static struct rdev_sysfs_entry rdev_recovery_start =
3089__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3090
3091
3092static ssize_t
3093badblocks_show(struct badblocks *bb, char *page, int unack);
3094static ssize_t
3095badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack);
3096
3097static ssize_t bb_show(struct md_rdev *rdev, char *page)
3098{
3099 return badblocks_show(&rdev->badblocks, page, 0);
3100}
3101static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3102{
3103 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3104
3105 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3106 wake_up(&rdev->blocked_wait);
3107 return rv;
3108}
3109static struct rdev_sysfs_entry rdev_bad_blocks =
3110__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3111
3112
3113static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3114{
3115 return badblocks_show(&rdev->badblocks, page, 1);
3116}
3117static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3118{
3119 return badblocks_store(&rdev->badblocks, page, len, 1);
3120}
3121static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3122__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3123
3124static struct attribute *rdev_default_attrs[] = {
3125 &rdev_state.attr,
3126 &rdev_errors.attr,
3127 &rdev_slot.attr,
3128 &rdev_offset.attr,
3129 &rdev_new_offset.attr,
3130 &rdev_size.attr,
3131 &rdev_recovery_start.attr,
3132 &rdev_bad_blocks.attr,
3133 &rdev_unack_bad_blocks.attr,
3134 NULL,
3135};
3136static ssize_t
3137rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3138{
3139 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3140 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3141 struct mddev *mddev = rdev->mddev;
3142 ssize_t rv;
3143
3144 if (!entry->show)
3145 return -EIO;
3146
3147 rv = mddev ? mddev_lock(mddev) : -EBUSY;
3148 if (!rv) {
3149 if (rdev->mddev == NULL)
3150 rv = -EBUSY;
3151 else
3152 rv = entry->show(rdev, page);
3153 mddev_unlock(mddev);
3154 }
3155 return rv;
3156}
3157
3158static ssize_t
3159rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3160 const char *page, size_t length)
3161{
3162 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3163 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3164 ssize_t rv;
3165 struct mddev *mddev = rdev->mddev;
3166
3167 if (!entry->store)
3168 return -EIO;
3169 if (!capable(CAP_SYS_ADMIN))
3170 return -EACCES;
3171 rv = mddev ? mddev_lock(mddev): -EBUSY;
3172 if (!rv) {
3173 if (rdev->mddev == NULL)
3174 rv = -EBUSY;
3175 else
3176 rv = entry->store(rdev, page, length);
3177 mddev_unlock(mddev);
3178 }
3179 return rv;
3180}
3181
3182static void rdev_free(struct kobject *ko)
3183{
3184 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3185 kfree(rdev);
3186}
3187static const struct sysfs_ops rdev_sysfs_ops = {
3188 .show = rdev_attr_show,
3189 .store = rdev_attr_store,
3190};
3191static struct kobj_type rdev_ktype = {
3192 .release = rdev_free,
3193 .sysfs_ops = &rdev_sysfs_ops,
3194 .default_attrs = rdev_default_attrs,
3195};
3196
3197int md_rdev_init(struct md_rdev *rdev)
3198{
3199 rdev->desc_nr = -1;
3200 rdev->saved_raid_disk = -1;
3201 rdev->raid_disk = -1;
3202 rdev->flags = 0;
3203 rdev->data_offset = 0;
3204 rdev->new_data_offset = 0;
3205 rdev->sb_events = 0;
3206 rdev->last_read_error.tv_sec = 0;
3207 rdev->last_read_error.tv_nsec = 0;
3208 rdev->sb_loaded = 0;
3209 rdev->bb_page = NULL;
3210 atomic_set(&rdev->nr_pending, 0);
3211 atomic_set(&rdev->read_errors, 0);
3212 atomic_set(&rdev->corrected_errors, 0);
3213
3214 INIT_LIST_HEAD(&rdev->same_set);
3215 init_waitqueue_head(&rdev->blocked_wait);
3216
3217
3218
3219
3220
3221 rdev->badblocks.count = 0;
3222 rdev->badblocks.shift = -1;
3223 rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
3224 seqlock_init(&rdev->badblocks.lock);
3225 if (rdev->badblocks.page == NULL)
3226 return -ENOMEM;
3227
3228 return 0;
3229}
3230EXPORT_SYMBOL_GPL(md_rdev_init);
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3242{
3243 char b[BDEVNAME_SIZE];
3244 int err;
3245 struct md_rdev *rdev;
3246 sector_t size;
3247
3248 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3249 if (!rdev) {
3250 printk(KERN_ERR "md: could not alloc mem for new device!\n");
3251 return ERR_PTR(-ENOMEM);
3252 }
3253
3254 err = md_rdev_init(rdev);
3255 if (err)
3256 goto abort_free;
3257 err = alloc_disk_sb(rdev);
3258 if (err)
3259 goto abort_free;
3260
3261 err = lock_rdev(rdev, newdev, super_format == -2);
3262 if (err)
3263 goto abort_free;
3264
3265 kobject_init(&rdev->kobj, &rdev_ktype);
3266
3267 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3268 if (!size) {
3269 printk(KERN_WARNING
3270 "md: %s has zero or unknown size, marking faulty!\n",
3271 bdevname(rdev->bdev,b));
3272 err = -EINVAL;
3273 goto abort_free;
3274 }
3275
3276 if (super_format >= 0) {
3277 err = super_types[super_format].
3278 load_super(rdev, NULL, super_minor);
3279 if (err == -EINVAL) {
3280 printk(KERN_WARNING
3281 "md: %s does not have a valid v%d.%d "
3282 "superblock, not importing!\n",
3283 bdevname(rdev->bdev,b),
3284 super_format, super_minor);
3285 goto abort_free;
3286 }
3287 if (err < 0) {
3288 printk(KERN_WARNING
3289 "md: could not read %s's sb, not importing!\n",
3290 bdevname(rdev->bdev,b));
3291 goto abort_free;
3292 }
3293 }
3294
3295 return rdev;
3296
3297abort_free:
3298 if (rdev->bdev)
3299 unlock_rdev(rdev);
3300 md_rdev_clear(rdev);
3301 kfree(rdev);
3302 return ERR_PTR(err);
3303}
3304
3305
3306
3307
3308
3309
3310static void analyze_sbs(struct mddev * mddev)
3311{
3312 int i;
3313 struct md_rdev *rdev, *freshest, *tmp;
3314 char b[BDEVNAME_SIZE];
3315
3316 freshest = NULL;
3317 rdev_for_each_safe(rdev, tmp, mddev)
3318 switch (super_types[mddev->major_version].
3319 load_super(rdev, freshest, mddev->minor_version)) {
3320 case 1:
3321 freshest = rdev;
3322 break;
3323 case 0:
3324 break;
3325 default:
3326 printk( KERN_ERR \
3327 "md: fatal superblock inconsistency in %s"
3328 " -- removing from array\n",
3329 bdevname(rdev->bdev,b));
3330 kick_rdev_from_array(rdev);
3331 }
3332
3333
3334 super_types[mddev->major_version].
3335 validate_super(mddev, freshest);
3336
3337 i = 0;
3338 rdev_for_each_safe(rdev, tmp, mddev) {
3339 if (mddev->max_disks &&
3340 (rdev->desc_nr >= mddev->max_disks ||
3341 i > mddev->max_disks)) {
3342 printk(KERN_WARNING
3343 "md: %s: %s: only %d devices permitted\n",
3344 mdname(mddev), bdevname(rdev->bdev, b),
3345 mddev->max_disks);
3346 kick_rdev_from_array(rdev);
3347 continue;
3348 }
3349 if (rdev != freshest)
3350 if (super_types[mddev->major_version].
3351 validate_super(mddev, rdev)) {
3352 printk(KERN_WARNING "md: kicking non-fresh %s"
3353 " from array!\n",
3354 bdevname(rdev->bdev,b));
3355 kick_rdev_from_array(rdev);
3356 continue;
3357 }
3358 if (mddev->level == LEVEL_MULTIPATH) {
3359 rdev->desc_nr = i++;
3360 rdev->raid_disk = rdev->desc_nr;
3361 set_bit(In_sync, &rdev->flags);
3362 } else if (rdev->raid_disk >= (mddev->raid_disks - min(0, mddev->delta_disks))) {
3363 rdev->raid_disk = -1;
3364 clear_bit(In_sync, &rdev->flags);
3365 }
3366 }
3367}
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3380{
3381 unsigned long result = 0;
3382 long decimals = -1;
3383 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3384 if (*cp == '.')
3385 decimals = 0;
3386 else if (decimals < scale) {
3387 unsigned int value;
3388 value = *cp - '0';
3389 result = result * 10 + value;
3390 if (decimals >= 0)
3391 decimals++;
3392 }
3393 cp++;
3394 }
3395 if (*cp == '\n')
3396 cp++;
3397 if (*cp)
3398 return -EINVAL;
3399 if (decimals < 0)
3400 decimals = 0;
3401 while (decimals < scale) {
3402 result *= 10;
3403 decimals ++;
3404 }
3405 *res = result;
3406 return 0;
3407}
3408
3409
3410static void md_safemode_timeout(unsigned long data);
3411
3412static ssize_t
3413safe_delay_show(struct mddev *mddev, char *page)
3414{
3415 int msec = (mddev->safemode_delay*1000)/HZ;
3416 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3417}
3418static ssize_t
3419safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3420{
3421 unsigned long msec;
3422
3423 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3424 return -EINVAL;
3425 if (msec == 0)
3426 mddev->safemode_delay = 0;
3427 else {
3428 unsigned long old_delay = mddev->safemode_delay;
3429 mddev->safemode_delay = (msec*HZ)/1000;
3430 if (mddev->safemode_delay == 0)
3431 mddev->safemode_delay = 1;
3432 if (mddev->safemode_delay < old_delay)
3433 md_safemode_timeout((unsigned long)mddev);
3434 }
3435 return len;
3436}
3437static struct md_sysfs_entry md_safe_delay =
3438__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3439
3440static ssize_t
3441level_show(struct mddev *mddev, char *page)
3442{
3443 struct md_personality *p = mddev->pers;
3444 if (p)
3445 return sprintf(page, "%s\n", p->name);
3446 else if (mddev->clevel[0])
3447 return sprintf(page, "%s\n", mddev->clevel);
3448 else if (mddev->level != LEVEL_NONE)
3449 return sprintf(page, "%d\n", mddev->level);
3450 else
3451 return 0;
3452}
3453
3454static ssize_t
3455level_store(struct mddev *mddev, const char *buf, size_t len)
3456{
3457 char clevel[16];
3458 ssize_t rv = len;
3459 struct md_personality *pers;
3460 long level;
3461 void *priv;
3462 struct md_rdev *rdev;
3463
3464 if (mddev->pers == NULL) {
3465 if (len == 0)
3466 return 0;
3467 if (len >= sizeof(mddev->clevel))
3468 return -ENOSPC;
3469 strncpy(mddev->clevel, buf, len);
3470 if (mddev->clevel[len-1] == '\n')
3471 len--;
3472 mddev->clevel[len] = 0;
3473 mddev->level = LEVEL_NONE;
3474 return rv;
3475 }
3476
3477
3478
3479
3480
3481
3482
3483 if (mddev->sync_thread ||
3484 mddev->reshape_position != MaxSector ||
3485 mddev->sysfs_active)
3486 return -EBUSY;
3487
3488 if (!mddev->pers->quiesce) {
3489 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
3490 mdname(mddev), mddev->pers->name);
3491 return -EINVAL;
3492 }
3493
3494
3495 if (len == 0 || len >= sizeof(clevel))
3496 return -EINVAL;
3497 strncpy(clevel, buf, len);
3498 if (clevel[len-1] == '\n')
3499 len--;
3500 clevel[len] = 0;
3501 if (kstrtol(clevel, 10, &level))
3502 level = LEVEL_NONE;
3503
3504 if (request_module("md-%s", clevel) != 0)
3505 request_module("md-level-%s", clevel);
3506 spin_lock(&pers_lock);
3507 pers = find_pers(level, clevel);
3508 if (!pers || !try_module_get(pers->owner)) {
3509 spin_unlock(&pers_lock);
3510 printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
3511 return -EINVAL;
3512 }
3513 spin_unlock(&pers_lock);
3514
3515 if (pers == mddev->pers) {
3516
3517 module_put(pers->owner);
3518 return rv;
3519 }
3520 if (!pers->takeover) {
3521 module_put(pers->owner);
3522 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
3523 mdname(mddev), clevel);
3524 return -EINVAL;
3525 }
3526
3527 rdev_for_each(rdev, mddev)
3528 rdev->new_raid_disk = rdev->raid_disk;
3529
3530
3531
3532
3533 priv = pers->takeover(mddev);
3534 if (IS_ERR(priv)) {
3535 mddev->new_level = mddev->level;
3536 mddev->new_layout = mddev->layout;
3537 mddev->new_chunk_sectors = mddev->chunk_sectors;
3538 mddev->raid_disks -= mddev->delta_disks;
3539 mddev->delta_disks = 0;
3540 mddev->reshape_backwards = 0;
3541 module_put(pers->owner);
3542 printk(KERN_WARNING "md: %s: %s would not accept array\n",
3543 mdname(mddev), clevel);
3544 return PTR_ERR(priv);
3545 }
3546
3547
3548 mddev_suspend(mddev);
3549 mddev->pers->stop(mddev);
3550
3551 if (mddev->pers->sync_request == NULL &&
3552 pers->sync_request != NULL) {
3553
3554 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3555 printk(KERN_WARNING
3556 "md: cannot register extra attributes for %s\n",
3557 mdname(mddev));
3558 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action");
3559 }
3560 if (mddev->pers->sync_request != NULL &&
3561 pers->sync_request == NULL) {
3562
3563 if (mddev->to_remove == NULL)
3564 mddev->to_remove = &md_redundancy_group;
3565 }
3566
3567 if (mddev->pers->sync_request == NULL &&
3568 mddev->external) {
3569
3570
3571
3572
3573
3574
3575
3576 mddev->in_sync = 0;
3577 mddev->safemode_delay = 0;
3578 mddev->safemode = 0;
3579 }
3580
3581 rdev_for_each(rdev, mddev) {
3582 if (rdev->raid_disk < 0)
3583 continue;
3584 if (rdev->new_raid_disk >= mddev->raid_disks)
3585 rdev->new_raid_disk = -1;
3586 if (rdev->new_raid_disk == rdev->raid_disk)
3587 continue;
3588 sysfs_unlink_rdev(mddev, rdev);
3589 }
3590 rdev_for_each(rdev, mddev) {
3591 if (rdev->raid_disk < 0)
3592 continue;
3593 if (rdev->new_raid_disk == rdev->raid_disk)
3594 continue;
3595 rdev->raid_disk = rdev->new_raid_disk;
3596 if (rdev->raid_disk < 0)
3597 clear_bit(In_sync, &rdev->flags);
3598 else {
3599 if (sysfs_link_rdev(mddev, rdev))
3600 printk(KERN_WARNING "md: cannot register rd%d"
3601 " for %s after level change\n",
3602 rdev->raid_disk, mdname(mddev));
3603 }
3604 }
3605
3606 module_put(mddev->pers->owner);
3607 mddev->pers = pers;
3608 mddev->private = priv;
3609 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3610 mddev->level = mddev->new_level;
3611 mddev->layout = mddev->new_layout;
3612 mddev->chunk_sectors = mddev->new_chunk_sectors;
3613 mddev->delta_disks = 0;
3614 mddev->reshape_backwards = 0;
3615 mddev->degraded = 0;
3616 if (mddev->pers->sync_request == NULL) {
3617
3618
3619
3620 mddev->in_sync = 1;
3621 del_timer_sync(&mddev->safemode_timer);
3622 }
3623 pers->run(mddev);
3624 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3625 mddev_resume(mddev);
3626 sysfs_notify(&mddev->kobj, NULL, "level");
3627 md_new_event(mddev);
3628 return rv;
3629}
3630
3631static struct md_sysfs_entry md_level =
3632__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3633
3634
3635static ssize_t
3636layout_show(struct mddev *mddev, char *page)
3637{
3638
3639 if (mddev->reshape_position != MaxSector &&
3640 mddev->layout != mddev->new_layout)
3641 return sprintf(page, "%d (%d)\n",
3642 mddev->new_layout, mddev->layout);
3643 return sprintf(page, "%d\n", mddev->layout);
3644}
3645
3646static ssize_t
3647layout_store(struct mddev *mddev, const char *buf, size_t len)
3648{
3649 char *e;
3650 unsigned long n = simple_strtoul(buf, &e, 10);
3651
3652 if (!*buf || (*e && *e != '\n'))
3653 return -EINVAL;
3654
3655 if (mddev->pers) {
3656 int err;
3657 if (mddev->pers->check_reshape == NULL)
3658 return -EBUSY;
3659 mddev->new_layout = n;
3660 err = mddev->pers->check_reshape(mddev);
3661 if (err) {
3662 mddev->new_layout = mddev->layout;
3663 return err;
3664 }
3665 } else {
3666 mddev->new_layout = n;
3667 if (mddev->reshape_position == MaxSector)
3668 mddev->layout = n;
3669 }
3670 return len;
3671}
3672static struct md_sysfs_entry md_layout =
3673__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3674
3675
3676static ssize_t
3677raid_disks_show(struct mddev *mddev, char *page)
3678{
3679 if (mddev->raid_disks == 0)
3680 return 0;
3681 if (mddev->reshape_position != MaxSector &&
3682 mddev->delta_disks != 0)
3683 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3684 mddev->raid_disks - mddev->delta_disks);
3685 return sprintf(page, "%d\n", mddev->raid_disks);
3686}
3687
3688static int update_raid_disks(struct mddev *mddev, int raid_disks);
3689
3690static ssize_t
3691raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3692{
3693 char *e;
3694 int rv = 0;
3695 unsigned long n = simple_strtoul(buf, &e, 10);
3696
3697 if (!*buf || (*e && *e != '\n'))
3698 return -EINVAL;
3699
3700 if (mddev->pers)
3701 rv = update_raid_disks(mddev, n);
3702 else if (mddev->reshape_position != MaxSector) {
3703 struct md_rdev *rdev;
3704 int olddisks = mddev->raid_disks - mddev->delta_disks;
3705
3706 rdev_for_each(rdev, mddev) {
3707 if (olddisks < n &&
3708 rdev->data_offset < rdev->new_data_offset)
3709 return -EINVAL;
3710 if (olddisks > n &&
3711 rdev->data_offset > rdev->new_data_offset)
3712 return -EINVAL;
3713 }
3714 mddev->delta_disks = n - olddisks;
3715 mddev->raid_disks = n;
3716 mddev->reshape_backwards = (mddev->delta_disks < 0);
3717 } else
3718 mddev->raid_disks = n;
3719 return rv ? rv : len;
3720}
3721static struct md_sysfs_entry md_raid_disks =
3722__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3723
3724static ssize_t
3725chunk_size_show(struct mddev *mddev, char *page)
3726{
3727 if (mddev->reshape_position != MaxSector &&
3728 mddev->chunk_sectors != mddev->new_chunk_sectors)
3729 return sprintf(page, "%d (%d)\n",
3730 mddev->new_chunk_sectors << 9,
3731 mddev->chunk_sectors << 9);
3732 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3733}
3734
3735static ssize_t
3736chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3737{
3738 char *e;
3739 unsigned long n = simple_strtoul(buf, &e, 10);
3740
3741 if (!*buf || (*e && *e != '\n'))
3742 return -EINVAL;
3743
3744 if (mddev->pers) {
3745 int err;
3746 if (mddev->pers->check_reshape == NULL)
3747 return -EBUSY;
3748 mddev->new_chunk_sectors = n >> 9;
3749 err = mddev->pers->check_reshape(mddev);
3750 if (err) {
3751 mddev->new_chunk_sectors = mddev->chunk_sectors;
3752 return err;
3753 }
3754 } else {
3755 mddev->new_chunk_sectors = n >> 9;
3756 if (mddev->reshape_position == MaxSector)
3757 mddev->chunk_sectors = n >> 9;
3758 }
3759 return len;
3760}
3761static struct md_sysfs_entry md_chunk_size =
3762__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3763
3764static ssize_t
3765resync_start_show(struct mddev *mddev, char *page)
3766{
3767 if (mddev->recovery_cp == MaxSector)
3768 return sprintf(page, "none\n");
3769 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3770}
3771
3772static ssize_t
3773resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3774{
3775 char *e;
3776 unsigned long long n = simple_strtoull(buf, &e, 10);
3777
3778 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3779 return -EBUSY;
3780 if (cmd_match(buf, "none"))
3781 n = MaxSector;
3782 else if (!*buf || (*e && *e != '\n'))
3783 return -EINVAL;
3784
3785 mddev->recovery_cp = n;
3786 if (mddev->pers)
3787 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3788 return len;
3789}
3790static struct md_sysfs_entry md_resync_start =
3791__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3830 write_pending, active_idle, bad_word};
3831static char *array_states[] = {
3832 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3833 "write-pending", "active-idle", NULL };
3834
3835static int match_word(const char *word, char **list)
3836{
3837 int n;
3838 for (n=0; list[n]; n++)
3839 if (cmd_match(word, list[n]))
3840 break;
3841 return n;
3842}
3843
3844static ssize_t
3845array_state_show(struct mddev *mddev, char *page)
3846{
3847 enum array_state st = inactive;
3848
3849 if (mddev->pers)
3850 switch(mddev->ro) {
3851 case 1:
3852 st = readonly;
3853 break;
3854 case 2:
3855 st = read_auto;
3856 break;
3857 case 0:
3858 if (mddev->in_sync)
3859 st = clean;
3860 else if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
3861 st = write_pending;
3862 else if (mddev->safemode)
3863 st = active_idle;
3864 else
3865 st = active;
3866 }
3867 else {
3868 if (list_empty(&mddev->disks) &&
3869 mddev->raid_disks == 0 &&
3870 mddev->dev_sectors == 0)
3871 st = clear;
3872 else
3873 st = inactive;
3874 }
3875 return sprintf(page, "%s\n", array_states[st]);
3876}
3877
3878static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev);
3879static int md_set_readonly(struct mddev * mddev, struct block_device *bdev);
3880static int do_md_run(struct mddev * mddev);
3881static int restart_array(struct mddev *mddev);
3882
3883static ssize_t
3884array_state_store(struct mddev *mddev, const char *buf, size_t len)
3885{
3886 int err = -EINVAL;
3887 enum array_state st = match_word(buf, array_states);
3888 switch(st) {
3889 case bad_word:
3890 break;
3891 case clear:
3892
3893 err = do_md_stop(mddev, 0, NULL);
3894 break;
3895 case inactive:
3896
3897 if (mddev->pers)
3898 err = do_md_stop(mddev, 2, NULL);
3899 else
3900 err = 0;
3901 break;
3902 case suspended:
3903 break;
3904 case readonly:
3905 if (mddev->pers)
3906 err = md_set_readonly(mddev, NULL);
3907 else {
3908 mddev->ro = 1;
3909 set_disk_ro(mddev->gendisk, 1);
3910 err = do_md_run(mddev);
3911 }
3912 break;
3913 case read_auto:
3914 if (mddev->pers) {
3915 if (mddev->ro == 0)
3916 err = md_set_readonly(mddev, NULL);
3917 else if (mddev->ro == 1)
3918 err = restart_array(mddev);
3919 if (err == 0) {
3920 mddev->ro = 2;
3921 set_disk_ro(mddev->gendisk, 0);
3922 }
3923 } else {
3924 mddev->ro = 2;
3925 err = do_md_run(mddev);
3926 }
3927 break;
3928 case clean:
3929 if (mddev->pers) {
3930 restart_array(mddev);
3931 spin_lock_irq(&mddev->write_lock);
3932 if (atomic_read(&mddev->writes_pending) == 0) {
3933 if (mddev->in_sync == 0) {
3934 mddev->in_sync = 1;
3935 if (mddev->safemode == 1)
3936 mddev->safemode = 0;
3937 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3938 }
3939 err = 0;
3940 } else
3941 err = -EBUSY;
3942 spin_unlock_irq(&mddev->write_lock);
3943 } else
3944 err = -EINVAL;
3945 break;
3946 case active:
3947 if (mddev->pers) {
3948 restart_array(mddev);
3949 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
3950 wake_up(&mddev->sb_wait);
3951 err = 0;
3952 } else {
3953 mddev->ro = 0;
3954 set_disk_ro(mddev->gendisk, 0);
3955 err = do_md_run(mddev);
3956 }
3957 break;
3958 case write_pending:
3959 case active_idle:
3960
3961 break;
3962 }
3963 if (err)
3964 return err;
3965 else {
3966 if (mddev->hold_active == UNTIL_IOCTL)
3967 mddev->hold_active = 0;
3968 sysfs_notify_dirent_safe(mddev->sysfs_state);
3969 return len;
3970 }
3971}
3972static struct md_sysfs_entry md_array_state =
3973__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
3974
3975static ssize_t
3976max_corrected_read_errors_show(struct mddev *mddev, char *page) {
3977 return sprintf(page, "%d\n",
3978 atomic_read(&mddev->max_corr_read_errors));
3979}
3980
3981static ssize_t
3982max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
3983{
3984 char *e;
3985 unsigned long n = simple_strtoul(buf, &e, 10);
3986
3987 if (*buf && (*e == 0 || *e == '\n')) {
3988 atomic_set(&mddev->max_corr_read_errors, n);
3989 return len;
3990 }
3991 return -EINVAL;
3992}
3993
3994static struct md_sysfs_entry max_corr_read_errors =
3995__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
3996 max_corrected_read_errors_store);
3997
3998static ssize_t
3999null_show(struct mddev *mddev, char *page)
4000{
4001 return -EINVAL;
4002}
4003
4004static ssize_t
4005new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4006{
4007
4008
4009
4010
4011
4012
4013
4014 char *e;
4015 int major = simple_strtoul(buf, &e, 10);
4016 int minor;
4017 dev_t dev;
4018 struct md_rdev *rdev;
4019 int err;
4020
4021 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4022 return -EINVAL;
4023 minor = simple_strtoul(e+1, &e, 10);
4024 if (*e && *e != '\n')
4025 return -EINVAL;
4026 dev = MKDEV(major, minor);
4027 if (major != MAJOR(dev) ||
4028 minor != MINOR(dev))
4029 return -EOVERFLOW;
4030
4031
4032 if (mddev->persistent) {
4033 rdev = md_import_device(dev, mddev->major_version,
4034 mddev->minor_version);
4035 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4036 struct md_rdev *rdev0
4037 = list_entry(mddev->disks.next,
4038 struct md_rdev, same_set);
4039 err = super_types[mddev->major_version]
4040 .load_super(rdev, rdev0, mddev->minor_version);
4041 if (err < 0)
4042 goto out;
4043 }
4044 } else if (mddev->external)
4045 rdev = md_import_device(dev, -2, -1);
4046 else
4047 rdev = md_import_device(dev, -1, -1);
4048
4049 if (IS_ERR(rdev))
4050 return PTR_ERR(rdev);
4051 err = bind_rdev_to_array(rdev, mddev);
4052 out:
4053 if (err)
4054 export_rdev(rdev);
4055 return err ? err : len;
4056}
4057
4058static struct md_sysfs_entry md_new_device =
4059__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4060
4061static ssize_t
4062bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4063{
4064 char *end;
4065 unsigned long chunk, end_chunk;
4066
4067 if (!mddev->bitmap)
4068 goto out;
4069
4070 while (*buf) {
4071 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4072 if (buf == end) break;
4073 if (*end == '-') {
4074 buf = end + 1;
4075 end_chunk = simple_strtoul(buf, &end, 0);
4076 if (buf == end) break;
4077 }
4078 if (*end && !isspace(*end)) break;
4079 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4080 buf = skip_spaces(end);
4081 }
4082 bitmap_unplug(mddev->bitmap);
4083out:
4084 return len;
4085}
4086
4087static struct md_sysfs_entry md_bitmap =
4088__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4089
4090static ssize_t
4091size_show(struct mddev *mddev, char *page)
4092{
4093 return sprintf(page, "%llu\n",
4094 (unsigned long long)mddev->dev_sectors / 2);
4095}
4096
4097static int update_size(struct mddev *mddev, sector_t num_sectors);
4098
4099static ssize_t
4100size_store(struct mddev *mddev, const char *buf, size_t len)
4101{
4102
4103
4104
4105
4106 sector_t sectors;
4107 int err = strict_blocks_to_sectors(buf, §ors);
4108
4109 if (err < 0)
4110 return err;
4111 if (mddev->pers) {
4112 err = update_size(mddev, sectors);
4113 md_update_sb(mddev, 1);
4114 } else {
4115 if (mddev->dev_sectors == 0 ||
4116 mddev->dev_sectors > sectors)
4117 mddev->dev_sectors = sectors;
4118 else
4119 err = -ENOSPC;
4120 }
4121 return err ? err : len;
4122}
4123
4124static struct md_sysfs_entry md_size =
4125__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4126
4127
4128
4129
4130
4131
4132
4133
4134static ssize_t
4135metadata_show(struct mddev *mddev, char *page)
4136{
4137 if (mddev->persistent)
4138 return sprintf(page, "%d.%d\n",
4139 mddev->major_version, mddev->minor_version);
4140 else if (mddev->external)
4141 return sprintf(page, "external:%s\n", mddev->metadata_type);
4142 else
4143 return sprintf(page, "none\n");
4144}
4145
4146static ssize_t
4147metadata_store(struct mddev *mddev, const char *buf, size_t len)
4148{
4149 int major, minor;
4150 char *e;
4151
4152
4153
4154
4155 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4156 ;
4157 else if (!list_empty(&mddev->disks))
4158 return -EBUSY;
4159
4160 if (cmd_match(buf, "none")) {
4161 mddev->persistent = 0;
4162 mddev->external = 0;
4163 mddev->major_version = 0;
4164 mddev->minor_version = 90;
4165 return len;
4166 }
4167 if (strncmp(buf, "external:", 9) == 0) {
4168 size_t namelen = len-9;
4169 if (namelen >= sizeof(mddev->metadata_type))
4170 namelen = sizeof(mddev->metadata_type)-1;
4171 strncpy(mddev->metadata_type, buf+9, namelen);
4172 mddev->metadata_type[namelen] = 0;
4173 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4174 mddev->metadata_type[--namelen] = 0;
4175 mddev->persistent = 0;
4176 mddev->external = 1;
4177 mddev->major_version = 0;
4178 mddev->minor_version = 90;
4179 return len;
4180 }
4181 major = simple_strtoul(buf, &e, 10);
4182 if (e==buf || *e != '.')
4183 return -EINVAL;
4184 buf = e+1;
4185 minor = simple_strtoul(buf, &e, 10);
4186 if (e==buf || (*e && *e != '\n') )
4187 return -EINVAL;
4188 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4189 return -ENOENT;
4190 mddev->major_version = major;
4191 mddev->minor_version = minor;
4192 mddev->persistent = 1;
4193 mddev->external = 0;
4194 return len;
4195}
4196
4197static struct md_sysfs_entry md_metadata =
4198__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4199
4200static ssize_t
4201action_show(struct mddev *mddev, char *page)
4202{
4203 char *type = "idle";
4204 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4205 type = "frozen";
4206 else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4207 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
4208 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4209 type = "reshape";
4210 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
4211 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
4212 type = "resync";
4213 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
4214 type = "check";
4215 else
4216 type = "repair";
4217 } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
4218 type = "recover";
4219 }
4220 return sprintf(page, "%s\n", type);
4221}
4222
4223static ssize_t
4224action_store(struct mddev *mddev, const char *page, size_t len)
4225{
4226 if (!mddev->pers || !mddev->pers->sync_request)
4227 return -EINVAL;
4228
4229 if (cmd_match(page, "frozen"))
4230 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4231 else
4232 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4233
4234 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4235 if (mddev->sync_thread) {
4236 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4237 md_reap_sync_thread(mddev);
4238 }
4239 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4240 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
4241 return -EBUSY;
4242 else if (cmd_match(page, "resync"))
4243 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4244 else if (cmd_match(page, "recover")) {
4245 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4246 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4247 } else if (cmd_match(page, "reshape")) {
4248 int err;
4249 if (mddev->pers->start_reshape == NULL)
4250 return -EINVAL;
4251 err = mddev->pers->start_reshape(mddev);
4252 if (err)
4253 return err;
4254 sysfs_notify(&mddev->kobj, NULL, "degraded");
4255 } else {
4256 if (cmd_match(page, "check"))
4257 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4258 else if (!cmd_match(page, "repair"))
4259 return -EINVAL;
4260 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4261 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4262 }
4263 if (mddev->ro == 2) {
4264
4265
4266
4267 mddev->ro = 0;
4268 md_wakeup_thread(mddev->sync_thread);
4269 }
4270 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4271 md_wakeup_thread(mddev->thread);
4272 sysfs_notify_dirent_safe(mddev->sysfs_action);
4273 return len;
4274}
4275
4276static struct md_sysfs_entry md_scan_mode =
4277__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4278
4279static ssize_t
4280last_sync_action_show(struct mddev *mddev, char *page)
4281{
4282 return sprintf(page, "%s\n", mddev->last_sync_action);
4283}
4284
4285static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4286
4287static ssize_t
4288mismatch_cnt_show(struct mddev *mddev, char *page)
4289{
4290 return sprintf(page, "%llu\n",
4291 (unsigned long long)
4292 atomic64_read(&mddev->resync_mismatches));
4293}
4294
4295static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4296
4297static ssize_t
4298sync_min_show(struct mddev *mddev, char *page)
4299{
4300 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4301 mddev->sync_speed_min ? "local": "system");
4302}
4303
4304static ssize_t
4305sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4306{
4307 int min;
4308 char *e;
4309 if (strncmp(buf, "system", 6)==0) {
4310 mddev->sync_speed_min = 0;
4311 return len;
4312 }
4313 min = simple_strtoul(buf, &e, 10);
4314 if (buf == e || (*e && *e != '\n') || min <= 0)
4315 return -EINVAL;
4316 mddev->sync_speed_min = min;
4317 return len;
4318}
4319
4320static struct md_sysfs_entry md_sync_min =
4321__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4322
4323static ssize_t
4324sync_max_show(struct mddev *mddev, char *page)
4325{
4326 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4327 mddev->sync_speed_max ? "local": "system");
4328}
4329
4330static ssize_t
4331sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4332{
4333 int max;
4334 char *e;
4335 if (strncmp(buf, "system", 6)==0) {
4336 mddev->sync_speed_max = 0;
4337 return len;
4338 }
4339 max = simple_strtoul(buf, &e, 10);
4340 if (buf == e || (*e && *e != '\n') || max <= 0)
4341 return -EINVAL;
4342 mddev->sync_speed_max = max;
4343 return len;
4344}
4345
4346static struct md_sysfs_entry md_sync_max =
4347__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4348
4349static ssize_t
4350degraded_show(struct mddev *mddev, char *page)
4351{
4352 return sprintf(page, "%d\n", mddev->degraded);
4353}
4354static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4355
4356static ssize_t
4357sync_force_parallel_show(struct mddev *mddev, char *page)
4358{
4359 return sprintf(page, "%d\n", mddev->parallel_resync);
4360}
4361
4362static ssize_t
4363sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4364{
4365 long n;
4366
4367 if (kstrtol(buf, 10, &n))
4368 return -EINVAL;
4369
4370 if (n != 0 && n != 1)
4371 return -EINVAL;
4372
4373 mddev->parallel_resync = n;
4374
4375 if (mddev->sync_thread)
4376 wake_up(&resync_wait);
4377
4378 return len;
4379}
4380
4381
4382static struct md_sysfs_entry md_sync_force_parallel =
4383__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4384 sync_force_parallel_show, sync_force_parallel_store);
4385
4386static ssize_t
4387sync_speed_show(struct mddev *mddev, char *page)
4388{
4389 unsigned long resync, dt, db;
4390 if (mddev->curr_resync == 0)
4391 return sprintf(page, "none\n");
4392 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4393 dt = (jiffies - mddev->resync_mark) / HZ;
4394 if (!dt) dt++;
4395 db = resync - mddev->resync_mark_cnt;
4396 return sprintf(page, "%lu\n", db/dt/2);
4397}
4398
4399static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4400
4401static ssize_t
4402sync_completed_show(struct mddev *mddev, char *page)
4403{
4404 unsigned long long max_sectors, resync;
4405
4406 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4407 return sprintf(page, "none\n");
4408
4409 if (mddev->curr_resync == 1 ||
4410 mddev->curr_resync == 2)
4411 return sprintf(page, "delayed\n");
4412
4413 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4414 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4415 max_sectors = mddev->resync_max_sectors;
4416 else
4417 max_sectors = mddev->dev_sectors;
4418
4419 resync = mddev->curr_resync_completed;
4420 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4421}
4422
4423static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
4424
4425static ssize_t
4426min_sync_show(struct mddev *mddev, char *page)
4427{
4428 return sprintf(page, "%llu\n",
4429 (unsigned long long)mddev->resync_min);
4430}
4431static ssize_t
4432min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4433{
4434 unsigned long long min;
4435 if (kstrtoull(buf, 10, &min))
4436 return -EINVAL;
4437 if (min > mddev->resync_max)
4438 return -EINVAL;
4439 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4440 return -EBUSY;
4441
4442
4443 if (mddev->chunk_sectors) {
4444 sector_t temp = min;
4445 if (sector_div(temp, mddev->chunk_sectors))
4446 return -EINVAL;
4447 }
4448 mddev->resync_min = min;
4449
4450 return len;
4451}
4452
4453static struct md_sysfs_entry md_min_sync =
4454__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4455
4456static ssize_t
4457max_sync_show(struct mddev *mddev, char *page)
4458{
4459 if (mddev->resync_max == MaxSector)
4460 return sprintf(page, "max\n");
4461 else
4462 return sprintf(page, "%llu\n",
4463 (unsigned long long)mddev->resync_max);
4464}
4465static ssize_t
4466max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4467{
4468 if (strncmp(buf, "max", 3) == 0)
4469 mddev->resync_max = MaxSector;
4470 else {
4471 unsigned long long max;
4472 if (kstrtoull(buf, 10, &max))
4473 return -EINVAL;
4474 if (max < mddev->resync_min)
4475 return -EINVAL;
4476 if (max < mddev->resync_max &&
4477 mddev->ro == 0 &&
4478 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4479 return -EBUSY;
4480
4481
4482 if (mddev->chunk_sectors) {
4483 sector_t temp = max;
4484 if (sector_div(temp, mddev->chunk_sectors))
4485 return -EINVAL;
4486 }
4487 mddev->resync_max = max;
4488 }
4489 wake_up(&mddev->recovery_wait);
4490 return len;
4491}
4492
4493static struct md_sysfs_entry md_max_sync =
4494__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4495
4496static ssize_t
4497suspend_lo_show(struct mddev *mddev, char *page)
4498{
4499 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4500}
4501
4502static ssize_t
4503suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4504{
4505 char *e;
4506 unsigned long long new = simple_strtoull(buf, &e, 10);
4507 unsigned long long old = mddev->suspend_lo;
4508
4509 if (mddev->pers == NULL ||
4510 mddev->pers->quiesce == NULL)
4511 return -EINVAL;
4512 if (buf == e || (*e && *e != '\n'))
4513 return -EINVAL;
4514
4515 mddev->suspend_lo = new;
4516 if (new >= old)
4517
4518 mddev->pers->quiesce(mddev, 2);
4519 else {
4520
4521 mddev->pers->quiesce(mddev, 1);
4522 mddev->pers->quiesce(mddev, 0);
4523 }
4524 return len;
4525}
4526static struct md_sysfs_entry md_suspend_lo =
4527__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4528
4529
4530static ssize_t
4531suspend_hi_show(struct mddev *mddev, char *page)
4532{
4533 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4534}
4535
4536static ssize_t
4537suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4538{
4539 char *e;
4540 unsigned long long new = simple_strtoull(buf, &e, 10);
4541 unsigned long long old = mddev->suspend_hi;
4542
4543 if (mddev->pers == NULL ||
4544 mddev->pers->quiesce == NULL)
4545 return -EINVAL;
4546 if (buf == e || (*e && *e != '\n'))
4547 return -EINVAL;
4548
4549 mddev->suspend_hi = new;
4550 if (new <= old)
4551
4552 mddev->pers->quiesce(mddev, 2);
4553 else {
4554
4555 mddev->pers->quiesce(mddev, 1);
4556 mddev->pers->quiesce(mddev, 0);
4557 }
4558 return len;
4559}
4560static struct md_sysfs_entry md_suspend_hi =
4561__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4562
4563static ssize_t
4564reshape_position_show(struct mddev *mddev, char *page)
4565{
4566 if (mddev->reshape_position != MaxSector)
4567 return sprintf(page, "%llu\n",
4568 (unsigned long long)mddev->reshape_position);
4569 strcpy(page, "none\n");
4570 return 5;
4571}
4572
4573static ssize_t
4574reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4575{
4576 struct md_rdev *rdev;
4577 char *e;
4578 unsigned long long new = simple_strtoull(buf, &e, 10);
4579 if (mddev->pers)
4580 return -EBUSY;
4581 if (buf == e || (*e && *e != '\n'))
4582 return -EINVAL;
4583 mddev->reshape_position = new;
4584 mddev->delta_disks = 0;
4585 mddev->reshape_backwards = 0;
4586 mddev->new_level = mddev->level;
4587 mddev->new_layout = mddev->layout;
4588 mddev->new_chunk_sectors = mddev->chunk_sectors;
4589 rdev_for_each(rdev, mddev)
4590 rdev->new_data_offset = rdev->data_offset;
4591 return len;
4592}
4593
4594static struct md_sysfs_entry md_reshape_position =
4595__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4596 reshape_position_store);
4597
4598static ssize_t
4599reshape_direction_show(struct mddev *mddev, char *page)
4600{
4601 return sprintf(page, "%s\n",
4602 mddev->reshape_backwards ? "backwards" : "forwards");
4603}
4604
4605static ssize_t
4606reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4607{
4608 int backwards = 0;
4609 if (cmd_match(buf, "forwards"))
4610 backwards = 0;
4611 else if (cmd_match(buf, "backwards"))
4612 backwards = 1;
4613 else
4614 return -EINVAL;
4615 if (mddev->reshape_backwards == backwards)
4616 return len;
4617
4618
4619 if (mddev->delta_disks)
4620 return -EBUSY;
4621
4622 if (mddev->persistent &&
4623 mddev->major_version == 0)
4624 return -EINVAL;
4625
4626 mddev->reshape_backwards = backwards;
4627 return len;
4628}
4629
4630static struct md_sysfs_entry md_reshape_direction =
4631__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
4632 reshape_direction_store);
4633
4634static ssize_t
4635array_size_show(struct mddev *mddev, char *page)
4636{
4637 if (mddev->external_size)
4638 return sprintf(page, "%llu\n",
4639 (unsigned long long)mddev->array_sectors/2);
4640 else
4641 return sprintf(page, "default\n");
4642}
4643
4644static ssize_t
4645array_size_store(struct mddev *mddev, const char *buf, size_t len)
4646{
4647 sector_t sectors;
4648
4649 if (strncmp(buf, "default", 7) == 0) {
4650 if (mddev->pers)
4651 sectors = mddev->pers->size(mddev, 0, 0);
4652 else
4653 sectors = mddev->array_sectors;
4654
4655 mddev->external_size = 0;
4656 } else {
4657 if (strict_blocks_to_sectors(buf, §ors) < 0)
4658 return -EINVAL;
4659 if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4660 return -E2BIG;
4661
4662 mddev->external_size = 1;
4663 }
4664
4665 mddev->array_sectors = sectors;
4666 if (mddev->pers) {
4667 set_capacity(mddev->gendisk, mddev->array_sectors);
4668 revalidate_disk(mddev->gendisk);
4669 }
4670 return len;
4671}
4672
4673static struct md_sysfs_entry md_array_size =
4674__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4675 array_size_store);
4676
4677static struct attribute *md_default_attrs[] = {
4678 &md_level.attr,
4679 &md_layout.attr,
4680 &md_raid_disks.attr,
4681 &md_chunk_size.attr,
4682 &md_size.attr,
4683 &md_resync_start.attr,
4684 &md_metadata.attr,
4685 &md_new_device.attr,
4686 &md_safe_delay.attr,
4687 &md_array_state.attr,
4688 &md_reshape_position.attr,
4689 &md_reshape_direction.attr,
4690 &md_array_size.attr,
4691 &max_corr_read_errors.attr,
4692 NULL,
4693};
4694
4695static struct attribute *md_redundancy_attrs[] = {
4696 &md_scan_mode.attr,
4697 &md_last_scan_mode.attr,
4698 &md_mismatches.attr,
4699 &md_sync_min.attr,
4700 &md_sync_max.attr,
4701 &md_sync_speed.attr,
4702 &md_sync_force_parallel.attr,
4703 &md_sync_completed.attr,
4704 &md_min_sync.attr,
4705 &md_max_sync.attr,
4706 &md_suspend_lo.attr,
4707 &md_suspend_hi.attr,
4708 &md_bitmap.attr,
4709 &md_degraded.attr,
4710 NULL,
4711};
4712static struct attribute_group md_redundancy_group = {
4713 .name = NULL,
4714 .attrs = md_redundancy_attrs,
4715};
4716
4717
4718static ssize_t
4719md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4720{
4721 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4722 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4723 ssize_t rv;
4724
4725 if (!entry->show)
4726 return -EIO;
4727 spin_lock(&all_mddevs_lock);
4728 if (list_empty(&mddev->all_mddevs)) {
4729 spin_unlock(&all_mddevs_lock);
4730 return -EBUSY;
4731 }
4732 mddev_get(mddev);
4733 spin_unlock(&all_mddevs_lock);
4734
4735 rv = mddev_lock(mddev);
4736 if (!rv) {
4737 rv = entry->show(mddev, page);
4738 mddev_unlock(mddev);
4739 }
4740 mddev_put(mddev);
4741 return rv;
4742}
4743
4744static ssize_t
4745md_attr_store(struct kobject *kobj, struct attribute *attr,
4746 const char *page, size_t length)
4747{
4748 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4749 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4750 ssize_t rv;
4751
4752 if (!entry->store)
4753 return -EIO;
4754 if (!capable(CAP_SYS_ADMIN))
4755 return -EACCES;
4756 spin_lock(&all_mddevs_lock);
4757 if (list_empty(&mddev->all_mddevs)) {
4758 spin_unlock(&all_mddevs_lock);
4759 return -EBUSY;
4760 }
4761 mddev_get(mddev);
4762 spin_unlock(&all_mddevs_lock);
4763 if (entry->store == new_dev_store)
4764 flush_workqueue(md_misc_wq);
4765 rv = mddev_lock(mddev);
4766 if (!rv) {
4767 rv = entry->store(mddev, page, length);
4768 mddev_unlock(mddev);
4769 }
4770 mddev_put(mddev);
4771 return rv;
4772}
4773
4774static void md_free(struct kobject *ko)
4775{
4776 struct mddev *mddev = container_of(ko, struct mddev, kobj);
4777
4778 if (mddev->sysfs_state)
4779 sysfs_put(mddev->sysfs_state);
4780
4781 if (mddev->gendisk) {
4782 del_gendisk(mddev->gendisk);
4783 put_disk(mddev->gendisk);
4784 }
4785 if (mddev->queue)
4786 blk_cleanup_queue(mddev->queue);
4787
4788 kfree(mddev);
4789}
4790
4791static const struct sysfs_ops md_sysfs_ops = {
4792 .show = md_attr_show,
4793 .store = md_attr_store,
4794};
4795static struct kobj_type md_ktype = {
4796 .release = md_free,
4797 .sysfs_ops = &md_sysfs_ops,
4798 .default_attrs = md_default_attrs,
4799};
4800
4801int mdp_major = 0;
4802
4803static void mddev_delayed_delete(struct work_struct *ws)
4804{
4805 struct mddev *mddev = container_of(ws, struct mddev, del_work);
4806
4807 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
4808 kobject_del(&mddev->kobj);
4809 kobject_put(&mddev->kobj);
4810}
4811
4812static int md_alloc(dev_t dev, char *name)
4813{
4814 static DEFINE_MUTEX(disks_mutex);
4815 struct mddev *mddev = mddev_find(dev);
4816 struct gendisk *disk;
4817 int partitioned;
4818 int shift;
4819 int unit;
4820 int error;
4821
4822 if (!mddev)
4823 return -ENODEV;
4824
4825 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
4826 shift = partitioned ? MdpMinorShift : 0;
4827 unit = MINOR(mddev->unit) >> shift;
4828
4829
4830
4831
4832 flush_workqueue(md_misc_wq);
4833
4834 mutex_lock(&disks_mutex);
4835 error = -EEXIST;
4836 if (mddev->gendisk)
4837 goto abort;
4838
4839 if (name) {
4840
4841
4842 struct mddev *mddev2;
4843 spin_lock(&all_mddevs_lock);
4844
4845 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
4846 if (mddev2->gendisk &&
4847 strcmp(mddev2->gendisk->disk_name, name) == 0) {
4848 spin_unlock(&all_mddevs_lock);
4849 goto abort;
4850 }
4851 spin_unlock(&all_mddevs_lock);
4852 }
4853
4854 error = -ENOMEM;
4855 mddev->queue = blk_alloc_queue(GFP_KERNEL);
4856 if (!mddev->queue)
4857 goto abort;
4858 mddev->queue->queuedata = mddev;
4859
4860 blk_queue_make_request(mddev->queue, md_make_request);
4861 blk_set_stacking_limits(&mddev->queue->limits);
4862
4863 disk = alloc_disk(1 << shift);
4864 if (!disk) {
4865 blk_cleanup_queue(mddev->queue);
4866 mddev->queue = NULL;
4867 goto abort;
4868 }
4869 disk->major = MAJOR(mddev->unit);
4870 disk->first_minor = unit << shift;
4871 if (name)
4872 strcpy(disk->disk_name, name);
4873 else if (partitioned)
4874 sprintf(disk->disk_name, "md_d%d", unit);
4875 else
4876 sprintf(disk->disk_name, "md%d", unit);
4877 disk->fops = &md_fops;
4878 disk->private_data = mddev;
4879 disk->queue = mddev->queue;
4880 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
4881
4882
4883
4884
4885 disk->flags |= GENHD_FL_EXT_DEVT;
4886 mddev->gendisk = disk;
4887
4888
4889
4890 mutex_lock(&mddev->open_mutex);
4891 add_disk(disk);
4892
4893 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
4894 &disk_to_dev(disk)->kobj, "%s", "md");
4895 if (error) {
4896
4897
4898
4899 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
4900 disk->disk_name);
4901 error = 0;
4902 }
4903 if (mddev->kobj.sd &&
4904 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
4905 printk(KERN_DEBUG "pointless warning\n");
4906 mutex_unlock(&mddev->open_mutex);
4907 abort:
4908 mutex_unlock(&disks_mutex);
4909 if (!error && mddev->kobj.sd) {
4910 kobject_uevent(&mddev->kobj, KOBJ_ADD);
4911 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
4912 }
4913 mddev_put(mddev);
4914 return error;
4915}
4916
4917static struct kobject *md_probe(dev_t dev, int *part, void *data)
4918{
4919 md_alloc(dev, NULL);
4920 return NULL;
4921}
4922
4923static int add_named_array(const char *val, struct kernel_param *kp)
4924{
4925
4926
4927
4928
4929 int len = strlen(val);
4930 char buf[DISK_NAME_LEN];
4931
4932 while (len && val[len-1] == '\n')
4933 len--;
4934 if (len >= DISK_NAME_LEN)
4935 return -E2BIG;
4936 strlcpy(buf, val, len+1);
4937 if (strncmp(buf, "md_", 3) != 0)
4938 return -EINVAL;
4939 return md_alloc(0, buf);
4940}
4941
4942static void md_safemode_timeout(unsigned long data)
4943{
4944 struct mddev *mddev = (struct mddev *) data;
4945
4946 if (!atomic_read(&mddev->writes_pending)) {
4947 mddev->safemode = 1;
4948 if (mddev->external)
4949 sysfs_notify_dirent_safe(mddev->sysfs_state);
4950 }
4951 md_wakeup_thread(mddev->thread);
4952}
4953
4954static int start_dirty_degraded;
4955
4956int md_run(struct mddev *mddev)
4957{
4958 int err;
4959 struct md_rdev *rdev;
4960 struct md_personality *pers;
4961
4962 if (list_empty(&mddev->disks))
4963
4964 return -EINVAL;
4965
4966 if (mddev->pers)
4967 return -EBUSY;
4968
4969 if (mddev->sysfs_active)
4970 return -EBUSY;
4971
4972
4973
4974
4975 if (!mddev->raid_disks) {
4976 if (!mddev->persistent)
4977 return -EINVAL;
4978 analyze_sbs(mddev);
4979 }
4980
4981 if (mddev->level != LEVEL_NONE)
4982 request_module("md-level-%d", mddev->level);
4983 else if (mddev->clevel[0])
4984 request_module("md-%s", mddev->clevel);
4985
4986
4987
4988
4989
4990
4991 rdev_for_each(rdev, mddev) {
4992 if (test_bit(Faulty, &rdev->flags))
4993 continue;
4994 sync_blockdev(rdev->bdev);
4995 invalidate_bdev(rdev->bdev);
4996
4997
4998
4999
5000
5001 if (rdev->meta_bdev) {
5002 ;
5003 } else if (rdev->data_offset < rdev->sb_start) {
5004 if (mddev->dev_sectors &&
5005 rdev->data_offset + mddev->dev_sectors
5006 > rdev->sb_start) {
5007 printk("md: %s: data overlaps metadata\n",
5008 mdname(mddev));
5009 return -EINVAL;
5010 }
5011 } else {
5012 if (rdev->sb_start + rdev->sb_size/512
5013 > rdev->data_offset) {
5014 printk("md: %s: metadata overlaps data\n",
5015 mdname(mddev));
5016 return -EINVAL;
5017 }
5018 }
5019 sysfs_notify_dirent_safe(rdev->sysfs_state);
5020 }
5021
5022 if (mddev->bio_set == NULL)
5023 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
5024
5025 spin_lock(&pers_lock);
5026 pers = find_pers(mddev->level, mddev->clevel);
5027 if (!pers || !try_module_get(pers->owner)) {
5028 spin_unlock(&pers_lock);
5029 if (mddev->level != LEVEL_NONE)
5030 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
5031 mddev->level);
5032 else
5033 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
5034 mddev->clevel);
5035 return -EINVAL;
5036 }
5037 mddev->pers = pers;
5038 spin_unlock(&pers_lock);
5039 if (mddev->level != pers->level) {
5040 mddev->level = pers->level;
5041 mddev->new_level = pers->level;
5042 }
5043 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5044
5045 if (mddev->reshape_position != MaxSector &&
5046 pers->start_reshape == NULL) {
5047
5048 mddev->pers = NULL;
5049 module_put(pers->owner);
5050 return -EINVAL;
5051 }
5052
5053 if (pers->sync_request) {
5054
5055
5056
5057 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5058 struct md_rdev *rdev2;
5059 int warned = 0;
5060
5061 rdev_for_each(rdev, mddev)
5062 rdev_for_each(rdev2, mddev) {
5063 if (rdev < rdev2 &&
5064 rdev->bdev->bd_contains ==
5065 rdev2->bdev->bd_contains) {
5066 printk(KERN_WARNING
5067 "%s: WARNING: %s appears to be"
5068 " on the same physical disk as"
5069 " %s.\n",
5070 mdname(mddev),
5071 bdevname(rdev->bdev,b),
5072 bdevname(rdev2->bdev,b2));
5073 warned = 1;
5074 }
5075 }
5076
5077 if (warned)
5078 printk(KERN_WARNING
5079 "True protection against single-disk"
5080 " failure might be compromised.\n");
5081 }
5082
5083 mddev->recovery = 0;
5084
5085 mddev->resync_max_sectors = mddev->dev_sectors;
5086
5087 mddev->ok_start_degraded = start_dirty_degraded;
5088
5089 if (start_readonly && mddev->ro == 0)
5090 mddev->ro = 2;
5091
5092 err = mddev->pers->run(mddev);
5093 if (err)
5094 printk(KERN_ERR "md: pers->run() failed ...\n");
5095 else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
5096 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
5097 " but 'external_size' not in effect?\n", __func__);
5098 printk(KERN_ERR
5099 "md: invalid array_size %llu > default size %llu\n",
5100 (unsigned long long)mddev->array_sectors / 2,
5101 (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
5102 err = -EINVAL;
5103 mddev->pers->stop(mddev);
5104 }
5105 if (err == 0 && mddev->pers->sync_request &&
5106 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5107 err = bitmap_create(mddev);
5108 if (err) {
5109 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
5110 mdname(mddev), err);
5111 mddev->pers->stop(mddev);
5112 }
5113 }
5114 if (err) {
5115 module_put(mddev->pers->owner);
5116 mddev->pers = NULL;
5117 bitmap_destroy(mddev);
5118 return err;
5119 }
5120 if (mddev->pers->sync_request) {
5121 if (mddev->kobj.sd &&
5122 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5123 printk(KERN_WARNING
5124 "md: cannot register extra attributes for %s\n",
5125 mdname(mddev));
5126 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5127 } else if (mddev->ro == 2)
5128 mddev->ro = 0;
5129
5130 atomic_set(&mddev->writes_pending,0);
5131 atomic_set(&mddev->max_corr_read_errors,
5132 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5133 mddev->safemode = 0;
5134 mddev->safemode_timer.function = md_safemode_timeout;
5135 mddev->safemode_timer.data = (unsigned long) mddev;
5136 mddev->safemode_delay = (200 * HZ)/1000 +1;
5137 mddev->in_sync = 1;
5138 smp_wmb();
5139 mddev->ready = 1;
5140 rdev_for_each(rdev, mddev)
5141 if (rdev->raid_disk >= 0)
5142 if (sysfs_link_rdev(mddev, rdev))
5143 ;
5144
5145 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5146
5147 if (mddev->flags)
5148 md_update_sb(mddev, 0);
5149
5150 md_new_event(mddev);
5151 sysfs_notify_dirent_safe(mddev->sysfs_state);
5152 sysfs_notify_dirent_safe(mddev->sysfs_action);
5153 sysfs_notify(&mddev->kobj, NULL, "degraded");
5154 return 0;
5155}
5156EXPORT_SYMBOL_GPL(md_run);
5157
5158static int do_md_run(struct mddev *mddev)
5159{
5160 int err;
5161
5162 err = md_run(mddev);
5163 if (err)
5164 goto out;
5165 err = bitmap_load(mddev);
5166 if (err) {
5167 bitmap_destroy(mddev);
5168 goto out;
5169 }
5170
5171 md_wakeup_thread(mddev->thread);
5172 md_wakeup_thread(mddev->sync_thread);
5173
5174 set_capacity(mddev->gendisk, mddev->array_sectors);
5175 revalidate_disk(mddev->gendisk);
5176 mddev->changed = 1;
5177 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5178out:
5179 return err;
5180}
5181
5182static int restart_array(struct mddev *mddev)
5183{
5184 struct gendisk *disk = mddev->gendisk;
5185
5186
5187 if (list_empty(&mddev->disks))
5188 return -ENXIO;
5189 if (!mddev->pers)
5190 return -EINVAL;
5191 if (!mddev->ro)
5192 return -EBUSY;
5193 mddev->safemode = 0;
5194 mddev->ro = 0;
5195 set_disk_ro(disk, 0);
5196 printk(KERN_INFO "md: %s switched to read-write mode.\n",
5197 mdname(mddev));
5198
5199 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5200 md_wakeup_thread(mddev->thread);
5201 md_wakeup_thread(mddev->sync_thread);
5202 sysfs_notify_dirent_safe(mddev->sysfs_state);
5203 return 0;
5204}
5205
5206
5207
5208static int deny_bitmap_write_access(struct file * file)
5209{
5210 struct inode *inode = file->f_mapping->host;
5211
5212 spin_lock(&inode->i_lock);
5213 if (atomic_read(&inode->i_writecount) > 1) {
5214 spin_unlock(&inode->i_lock);
5215 return -ETXTBSY;
5216 }
5217 atomic_set(&inode->i_writecount, -1);
5218 spin_unlock(&inode->i_lock);
5219
5220 return 0;
5221}
5222
5223void restore_bitmap_write_access(struct file *file)
5224{
5225 struct inode *inode = file->f_mapping->host;
5226
5227 spin_lock(&inode->i_lock);
5228 atomic_set(&inode->i_writecount, 1);
5229 spin_unlock(&inode->i_lock);
5230}
5231
5232static void md_clean(struct mddev *mddev)
5233{
5234 mddev->array_sectors = 0;
5235 mddev->external_size = 0;
5236 mddev->dev_sectors = 0;
5237 mddev->raid_disks = 0;
5238 mddev->recovery_cp = 0;
5239 mddev->resync_min = 0;
5240 mddev->resync_max = MaxSector;
5241 mddev->reshape_position = MaxSector;
5242 mddev->external = 0;
5243 mddev->persistent = 0;
5244 mddev->level = LEVEL_NONE;
5245 mddev->clevel[0] = 0;
5246 mddev->flags = 0;
5247 mddev->ro = 0;
5248 mddev->metadata_type[0] = 0;
5249 mddev->chunk_sectors = 0;
5250 mddev->ctime = mddev->utime = 0;
5251 mddev->layout = 0;
5252 mddev->max_disks = 0;
5253 mddev->events = 0;
5254 mddev->can_decrease_events = 0;
5255 mddev->delta_disks = 0;
5256 mddev->reshape_backwards = 0;
5257 mddev->new_level = LEVEL_NONE;
5258 mddev->new_layout = 0;
5259 mddev->new_chunk_sectors = 0;
5260 mddev->curr_resync = 0;
5261 atomic64_set(&mddev->resync_mismatches, 0);
5262 mddev->suspend_lo = mddev->suspend_hi = 0;
5263 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5264 mddev->recovery = 0;
5265 mddev->in_sync = 0;
5266 mddev->changed = 0;
5267 mddev->degraded = 0;
5268 mddev->safemode = 0;
5269 mddev->merge_check_needed = 0;
5270 mddev->bitmap_info.offset = 0;
5271 mddev->bitmap_info.default_offset = 0;
5272 mddev->bitmap_info.default_space = 0;
5273 mddev->bitmap_info.chunksize = 0;
5274 mddev->bitmap_info.daemon_sleep = 0;
5275 mddev->bitmap_info.max_write_behind = 0;
5276}
5277
5278static void __md_stop_writes(struct mddev *mddev)
5279{
5280 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5281 if (mddev->sync_thread) {
5282 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5283 md_reap_sync_thread(mddev);
5284 }
5285
5286 del_timer_sync(&mddev->safemode_timer);
5287
5288 bitmap_flush(mddev);
5289 md_super_wait(mddev);
5290
5291 if (mddev->ro == 0 &&
5292 (!mddev->in_sync || mddev->flags)) {
5293
5294 mddev->in_sync = 1;
5295 md_update_sb(mddev, 1);
5296 }
5297}
5298
5299void md_stop_writes(struct mddev *mddev)
5300{
5301 mddev_lock(mddev);
5302 __md_stop_writes(mddev);
5303 mddev_unlock(mddev);
5304}
5305EXPORT_SYMBOL_GPL(md_stop_writes);
5306
5307static void __md_stop(struct mddev *mddev)
5308{
5309 mddev->ready = 0;
5310 mddev->pers->stop(mddev);
5311 if (mddev->pers->sync_request && mddev->to_remove == NULL)
5312 mddev->to_remove = &md_redundancy_group;
5313 module_put(mddev->pers->owner);
5314 mddev->pers = NULL;
5315 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5316}
5317
5318void md_stop(struct mddev *mddev)
5319{
5320
5321
5322
5323 __md_stop(mddev);
5324 bitmap_destroy(mddev);
5325 if (mddev->bio_set)
5326 bioset_free(mddev->bio_set);
5327}
5328
5329EXPORT_SYMBOL_GPL(md_stop);
5330
5331static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5332{
5333 int err = 0;
5334 mutex_lock(&mddev->open_mutex);
5335 if (atomic_read(&mddev->openers) > !!bdev) {
5336 printk("md: %s still in use.\n",mdname(mddev));
5337 err = -EBUSY;
5338 goto out;
5339 }
5340 if (bdev)
5341 sync_blockdev(bdev);
5342 if (mddev->pers) {
5343 __md_stop_writes(mddev);
5344
5345 err = -ENXIO;
5346 if (mddev->ro==1)
5347 goto out;
5348 mddev->ro = 1;
5349 set_disk_ro(mddev->gendisk, 1);
5350 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5351 sysfs_notify_dirent_safe(mddev->sysfs_state);
5352 err = 0;
5353 }
5354out:
5355 mutex_unlock(&mddev->open_mutex);
5356 return err;
5357}
5358
5359
5360
5361
5362
5363static int do_md_stop(struct mddev * mddev, int mode,
5364 struct block_device *bdev)
5365{
5366 struct gendisk *disk = mddev->gendisk;
5367 struct md_rdev *rdev;
5368
5369 mutex_lock(&mddev->open_mutex);
5370 if (atomic_read(&mddev->openers) > !!bdev ||
5371 mddev->sysfs_active) {
5372 printk("md: %s still in use.\n",mdname(mddev));
5373 mutex_unlock(&mddev->open_mutex);
5374 return -EBUSY;
5375 }
5376 if (bdev)
5377
5378
5379
5380
5381
5382 sync_blockdev(bdev);
5383
5384 if (mddev->pers) {
5385 if (mddev->ro)
5386 set_disk_ro(disk, 0);
5387
5388 __md_stop_writes(mddev);
5389 __md_stop(mddev);
5390 mddev->queue->merge_bvec_fn = NULL;
5391 mddev->queue->backing_dev_info.congested_fn = NULL;
5392
5393
5394 sysfs_notify_dirent_safe(mddev->sysfs_state);
5395
5396 rdev_for_each(rdev, mddev)
5397 if (rdev->raid_disk >= 0)
5398 sysfs_unlink_rdev(mddev, rdev);
5399
5400 set_capacity(disk, 0);
5401 mutex_unlock(&mddev->open_mutex);
5402 mddev->changed = 1;
5403 revalidate_disk(disk);
5404
5405 if (mddev->ro)
5406 mddev->ro = 0;
5407 } else
5408 mutex_unlock(&mddev->open_mutex);
5409
5410
5411
5412 if (mode == 0) {
5413 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
5414
5415 bitmap_destroy(mddev);
5416 if (mddev->bitmap_info.file) {
5417 restore_bitmap_write_access(mddev->bitmap_info.file);
5418 fput(mddev->bitmap_info.file);
5419 mddev->bitmap_info.file = NULL;
5420 }
5421 mddev->bitmap_info.offset = 0;
5422
5423 export_array(mddev);
5424
5425 md_clean(mddev);
5426 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5427 if (mddev->hold_active == UNTIL_STOP)
5428 mddev->hold_active = 0;
5429 }
5430 blk_integrity_unregister(disk);
5431 md_new_event(mddev);
5432 sysfs_notify_dirent_safe(mddev->sysfs_state);
5433 return 0;
5434}
5435
5436#ifndef MODULE
5437static void autorun_array(struct mddev *mddev)
5438{
5439 struct md_rdev *rdev;
5440 int err;
5441
5442 if (list_empty(&mddev->disks))
5443 return;
5444
5445 printk(KERN_INFO "md: running: ");
5446
5447 rdev_for_each(rdev, mddev) {
5448 char b[BDEVNAME_SIZE];
5449 printk("<%s>", bdevname(rdev->bdev,b));
5450 }
5451 printk("\n");
5452
5453 err = do_md_run(mddev);
5454 if (err) {
5455 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
5456 do_md_stop(mddev, 0, NULL);
5457 }
5458}
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472static void autorun_devices(int part)
5473{
5474 struct md_rdev *rdev0, *rdev, *tmp;
5475 struct mddev *mddev;
5476 char b[BDEVNAME_SIZE];
5477
5478 printk(KERN_INFO "md: autorun ...\n");
5479 while (!list_empty(&pending_raid_disks)) {
5480 int unit;
5481 dev_t dev;
5482 LIST_HEAD(candidates);
5483 rdev0 = list_entry(pending_raid_disks.next,
5484 struct md_rdev, same_set);
5485
5486 printk(KERN_INFO "md: considering %s ...\n",
5487 bdevname(rdev0->bdev,b));
5488 INIT_LIST_HEAD(&candidates);
5489 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
5490 if (super_90_load(rdev, rdev0, 0) >= 0) {
5491 printk(KERN_INFO "md: adding %s ...\n",
5492 bdevname(rdev->bdev,b));
5493 list_move(&rdev->same_set, &candidates);
5494 }
5495
5496
5497
5498
5499
5500 if (part) {
5501 dev = MKDEV(mdp_major,
5502 rdev0->preferred_minor << MdpMinorShift);
5503 unit = MINOR(dev) >> MdpMinorShift;
5504 } else {
5505 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
5506 unit = MINOR(dev);
5507 }
5508 if (rdev0->preferred_minor != unit) {
5509 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
5510 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
5511 break;
5512 }
5513
5514 md_probe(dev, NULL, NULL);
5515 mddev = mddev_find(dev);
5516 if (!mddev || !mddev->gendisk) {
5517 if (mddev)
5518 mddev_put(mddev);
5519 printk(KERN_ERR
5520 "md: cannot allocate memory for md drive.\n");
5521 break;
5522 }
5523 if (mddev_lock(mddev))
5524 printk(KERN_WARNING "md: %s locked, cannot run\n",
5525 mdname(mddev));
5526 else if (mddev->raid_disks || mddev->major_version
5527 || !list_empty(&mddev->disks)) {
5528 printk(KERN_WARNING
5529 "md: %s already running, cannot run %s\n",
5530 mdname(mddev), bdevname(rdev0->bdev,b));
5531 mddev_unlock(mddev);
5532 } else {
5533 printk(KERN_INFO "md: created %s\n", mdname(mddev));
5534 mddev->persistent = 1;
5535 rdev_for_each_list(rdev, tmp, &candidates) {
5536 list_del_init(&rdev->same_set);
5537 if (bind_rdev_to_array(rdev, mddev))
5538 export_rdev(rdev);
5539 }
5540 autorun_array(mddev);
5541 mddev_unlock(mddev);
5542 }
5543
5544
5545
5546 rdev_for_each_list(rdev, tmp, &candidates) {
5547 list_del_init(&rdev->same_set);
5548 export_rdev(rdev);
5549 }
5550 mddev_put(mddev);
5551 }
5552 printk(KERN_INFO "md: ... autorun DONE.\n");
5553}
5554#endif
5555
5556static int get_version(void __user * arg)
5557{
5558 mdu_version_t ver;
5559
5560 ver.major = MD_MAJOR_VERSION;
5561 ver.minor = MD_MINOR_VERSION;
5562 ver.patchlevel = MD_PATCHLEVEL_VERSION;
5563
5564 if (copy_to_user(arg, &ver, sizeof(ver)))
5565 return -EFAULT;
5566
5567 return 0;
5568}
5569
5570static int get_array_info(struct mddev * mddev, void __user * arg)
5571{
5572 mdu_array_info_t info;
5573 int nr,working,insync,failed,spare;
5574 struct md_rdev *rdev;
5575
5576 nr = working = insync = failed = spare = 0;
5577 rcu_read_lock();
5578 rdev_for_each_rcu(rdev, mddev) {
5579 nr++;
5580 if (test_bit(Faulty, &rdev->flags))
5581 failed++;
5582 else {
5583 working++;
5584 if (test_bit(In_sync, &rdev->flags))
5585 insync++;
5586 else
5587 spare++;
5588 }
5589 }
5590 rcu_read_unlock();
5591
5592 info.major_version = mddev->major_version;
5593 info.minor_version = mddev->minor_version;
5594 info.patch_version = MD_PATCHLEVEL_VERSION;
5595 info.ctime = mddev->ctime;
5596 info.level = mddev->level;
5597 info.size = mddev->dev_sectors / 2;
5598 if (info.size != mddev->dev_sectors / 2)
5599 info.size = -1;
5600 info.nr_disks = nr;
5601 info.raid_disks = mddev->raid_disks;
5602 info.md_minor = mddev->md_minor;
5603 info.not_persistent= !mddev->persistent;
5604
5605 info.utime = mddev->utime;
5606 info.state = 0;
5607 if (mddev->in_sync)
5608 info.state = (1<<MD_SB_CLEAN);
5609 if (mddev->bitmap && mddev->bitmap_info.offset)
5610 info.state = (1<<MD_SB_BITMAP_PRESENT);
5611 info.active_disks = insync;
5612 info.working_disks = working;
5613 info.failed_disks = failed;
5614 info.spare_disks = spare;
5615
5616 info.layout = mddev->layout;
5617 info.chunk_size = mddev->chunk_sectors << 9;
5618
5619 if (copy_to_user(arg, &info, sizeof(info)))
5620 return -EFAULT;
5621
5622 return 0;
5623}
5624
5625static int get_bitmap_file(struct mddev * mddev, void __user * arg)
5626{
5627 mdu_bitmap_file_t *file = NULL;
5628 char *ptr, *buf = NULL;
5629 int err = -ENOMEM;
5630
5631 if (md_allow_write(mddev))
5632 file = kmalloc(sizeof(*file), GFP_NOIO);
5633 else
5634 file = kmalloc(sizeof(*file), GFP_KERNEL);
5635
5636 if (!file)
5637 goto out;
5638
5639
5640 if (!mddev->bitmap || !mddev->bitmap->storage.file) {
5641 file->pathname[0] = '\0';
5642 goto copy_out;
5643 }
5644
5645 buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
5646 if (!buf)
5647 goto out;
5648
5649 ptr = d_path(&mddev->bitmap->storage.file->f_path,
5650 buf, sizeof(file->pathname));
5651 if (IS_ERR(ptr))
5652 goto out;
5653
5654 strcpy(file->pathname, ptr);
5655
5656copy_out:
5657 err = 0;
5658 if (copy_to_user(arg, file, sizeof(*file)))
5659 err = -EFAULT;
5660out:
5661 kfree(buf);
5662 kfree(file);
5663 return err;
5664}
5665
5666static int get_disk_info(struct mddev * mddev, void __user * arg)
5667{
5668 mdu_disk_info_t info;
5669 struct md_rdev *rdev;
5670
5671 if (copy_from_user(&info, arg, sizeof(info)))
5672 return -EFAULT;
5673
5674 rcu_read_lock();
5675 rdev = find_rdev_nr_rcu(mddev, info.number);
5676 if (rdev) {
5677 info.major = MAJOR(rdev->bdev->bd_dev);
5678 info.minor = MINOR(rdev->bdev->bd_dev);
5679 info.raid_disk = rdev->raid_disk;
5680 info.state = 0;
5681 if (test_bit(Faulty, &rdev->flags))
5682 info.state |= (1<<MD_DISK_FAULTY);
5683 else if (test_bit(In_sync, &rdev->flags)) {
5684 info.state |= (1<<MD_DISK_ACTIVE);
5685 info.state |= (1<<MD_DISK_SYNC);
5686 }
5687 if (test_bit(WriteMostly, &rdev->flags))
5688 info.state |= (1<<MD_DISK_WRITEMOSTLY);
5689 } else {
5690 info.major = info.minor = 0;
5691 info.raid_disk = -1;
5692 info.state = (1<<MD_DISK_REMOVED);
5693 }
5694 rcu_read_unlock();
5695
5696 if (copy_to_user(arg, &info, sizeof(info)))
5697 return -EFAULT;
5698
5699 return 0;
5700}
5701
5702static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
5703{
5704 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5705 struct md_rdev *rdev;
5706 dev_t dev = MKDEV(info->major,info->minor);
5707
5708 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
5709 return -EOVERFLOW;
5710
5711 if (!mddev->raid_disks) {
5712 int err;
5713
5714 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
5715 if (IS_ERR(rdev)) {
5716 printk(KERN_WARNING
5717 "md: md_import_device returned %ld\n",
5718 PTR_ERR(rdev));
5719 return PTR_ERR(rdev);
5720 }
5721 if (!list_empty(&mddev->disks)) {
5722 struct md_rdev *rdev0
5723 = list_entry(mddev->disks.next,
5724 struct md_rdev, same_set);
5725 err = super_types[mddev->major_version]
5726 .load_super(rdev, rdev0, mddev->minor_version);
5727 if (err < 0) {
5728 printk(KERN_WARNING
5729 "md: %s has different UUID to %s\n",
5730 bdevname(rdev->bdev,b),
5731 bdevname(rdev0->bdev,b2));
5732 export_rdev(rdev);
5733 return -EINVAL;
5734 }
5735 }
5736 err = bind_rdev_to_array(rdev, mddev);
5737 if (err)
5738 export_rdev(rdev);
5739 return err;
5740 }
5741
5742
5743
5744
5745
5746
5747 if (mddev->pers) {
5748 int err;
5749 if (!mddev->pers->hot_add_disk) {
5750 printk(KERN_WARNING
5751 "%s: personality does not support diskops!\n",
5752 mdname(mddev));
5753 return -EINVAL;
5754 }
5755 if (mddev->persistent)
5756 rdev = md_import_device(dev, mddev->major_version,
5757 mddev->minor_version);
5758 else
5759 rdev = md_import_device(dev, -1, -1);
5760 if (IS_ERR(rdev)) {
5761 printk(KERN_WARNING
5762 "md: md_import_device returned %ld\n",
5763 PTR_ERR(rdev));
5764 return PTR_ERR(rdev);
5765 }
5766
5767 if (!mddev->persistent) {
5768 if (info->state & (1<<MD_DISK_SYNC) &&
5769 info->raid_disk < mddev->raid_disks) {
5770 rdev->raid_disk = info->raid_disk;
5771 set_bit(In_sync, &rdev->flags);
5772 } else
5773 rdev->raid_disk = -1;
5774 } else
5775 super_types[mddev->major_version].
5776 validate_super(mddev, rdev);
5777 if ((info->state & (1<<MD_DISK_SYNC)) &&
5778 rdev->raid_disk != info->raid_disk) {
5779
5780
5781
5782 export_rdev(rdev);
5783 return -EINVAL;
5784 }
5785
5786 if (test_bit(In_sync, &rdev->flags))
5787 rdev->saved_raid_disk = rdev->raid_disk;
5788 else
5789 rdev->saved_raid_disk = -1;
5790
5791 clear_bit(In_sync, &rdev->flags);
5792 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5793 set_bit(WriteMostly, &rdev->flags);
5794 else
5795 clear_bit(WriteMostly, &rdev->flags);
5796
5797 rdev->raid_disk = -1;
5798 err = bind_rdev_to_array(rdev, mddev);
5799 if (!err && !mddev->pers->hot_remove_disk) {
5800
5801
5802
5803
5804 super_types[mddev->major_version].
5805 validate_super(mddev, rdev);
5806 err = mddev->pers->hot_add_disk(mddev, rdev);
5807 if (err)
5808 unbind_rdev_from_array(rdev);
5809 }
5810 if (err)
5811 export_rdev(rdev);
5812 else
5813 sysfs_notify_dirent_safe(rdev->sysfs_state);
5814
5815 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5816 if (mddev->degraded)
5817 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5818 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5819 if (!err)
5820 md_new_event(mddev);
5821 md_wakeup_thread(mddev->thread);
5822 return err;
5823 }
5824
5825
5826
5827
5828 if (mddev->major_version != 0) {
5829 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
5830 mdname(mddev));
5831 return -EINVAL;
5832 }
5833
5834 if (!(info->state & (1<<MD_DISK_FAULTY))) {
5835 int err;
5836 rdev = md_import_device(dev, -1, 0);
5837 if (IS_ERR(rdev)) {
5838 printk(KERN_WARNING
5839 "md: error, md_import_device() returned %ld\n",
5840 PTR_ERR(rdev));
5841 return PTR_ERR(rdev);
5842 }
5843 rdev->desc_nr = info->number;
5844 if (info->raid_disk < mddev->raid_disks)
5845 rdev->raid_disk = info->raid_disk;
5846 else
5847 rdev->raid_disk = -1;
5848
5849 if (rdev->raid_disk < mddev->raid_disks)
5850 if (info->state & (1<<MD_DISK_SYNC))
5851 set_bit(In_sync, &rdev->flags);
5852
5853 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5854 set_bit(WriteMostly, &rdev->flags);
5855
5856 if (!mddev->persistent) {
5857 printk(KERN_INFO "md: nonpersistent superblock ...\n");
5858 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5859 } else
5860 rdev->sb_start = calc_dev_sboffset(rdev);
5861 rdev->sectors = rdev->sb_start;
5862
5863 err = bind_rdev_to_array(rdev, mddev);
5864 if (err) {
5865 export_rdev(rdev);
5866 return err;
5867 }
5868 }
5869
5870 return 0;
5871}
5872
5873static int hot_remove_disk(struct mddev * mddev, dev_t dev)
5874{
5875 char b[BDEVNAME_SIZE];
5876 struct md_rdev *rdev;
5877
5878 rdev = find_rdev(mddev, dev);
5879 if (!rdev)
5880 return -ENXIO;
5881
5882 clear_bit(Blocked, &rdev->flags);
5883 remove_and_add_spares(mddev, rdev);
5884
5885 if (rdev->raid_disk >= 0)
5886 goto busy;
5887
5888 kick_rdev_from_array(rdev);
5889 md_update_sb(mddev, 1);
5890 md_new_event(mddev);
5891
5892 return 0;
5893busy:
5894 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
5895 bdevname(rdev->bdev,b), mdname(mddev));
5896 return -EBUSY;
5897}
5898
5899static int hot_add_disk(struct mddev * mddev, dev_t dev)
5900{
5901 char b[BDEVNAME_SIZE];
5902 int err;
5903 struct md_rdev *rdev;
5904
5905 if (!mddev->pers)
5906 return -ENODEV;
5907
5908 if (mddev->major_version != 0) {
5909 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
5910 " version-0 superblocks.\n",
5911 mdname(mddev));
5912 return -EINVAL;
5913 }
5914 if (!mddev->pers->hot_add_disk) {
5915 printk(KERN_WARNING
5916 "%s: personality does not support diskops!\n",
5917 mdname(mddev));
5918 return -EINVAL;
5919 }
5920
5921 rdev = md_import_device(dev, -1, 0);
5922 if (IS_ERR(rdev)) {
5923 printk(KERN_WARNING
5924 "md: error, md_import_device() returned %ld\n",
5925 PTR_ERR(rdev));
5926 return -EINVAL;
5927 }
5928
5929 if (mddev->persistent)
5930 rdev->sb_start = calc_dev_sboffset(rdev);
5931 else
5932 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5933
5934 rdev->sectors = rdev->sb_start;
5935
5936 if (test_bit(Faulty, &rdev->flags)) {
5937 printk(KERN_WARNING
5938 "md: can not hot-add faulty %s disk to %s!\n",
5939 bdevname(rdev->bdev,b), mdname(mddev));
5940 err = -EINVAL;
5941 goto abort_export;
5942 }
5943 clear_bit(In_sync, &rdev->flags);
5944 rdev->desc_nr = -1;
5945 rdev->saved_raid_disk = -1;
5946 err = bind_rdev_to_array(rdev, mddev);
5947 if (err)
5948 goto abort_export;
5949
5950
5951
5952
5953
5954
5955 rdev->raid_disk = -1;
5956
5957 md_update_sb(mddev, 1);
5958
5959
5960
5961
5962
5963 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5964 md_wakeup_thread(mddev->thread);
5965 md_new_event(mddev);
5966 return 0;
5967
5968abort_export:
5969 export_rdev(rdev);
5970 return err;
5971}
5972
5973static int set_bitmap_file(struct mddev *mddev, int fd)
5974{
5975 int err;
5976
5977 if (mddev->pers) {
5978 if (!mddev->pers->quiesce)
5979 return -EBUSY;
5980 if (mddev->recovery || mddev->sync_thread)
5981 return -EBUSY;
5982
5983 }
5984
5985
5986 if (fd >= 0) {
5987 if (mddev->bitmap)
5988 return -EEXIST;
5989 mddev->bitmap_info.file = fget(fd);
5990
5991 if (mddev->bitmap_info.file == NULL) {
5992 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
5993 mdname(mddev));
5994 return -EBADF;
5995 }
5996
5997 err = deny_bitmap_write_access(mddev->bitmap_info.file);
5998 if (err) {
5999 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
6000 mdname(mddev));
6001 fput(mddev->bitmap_info.file);
6002 mddev->bitmap_info.file = NULL;
6003 return err;
6004 }
6005 mddev->bitmap_info.offset = 0;
6006 } else if (mddev->bitmap == NULL)
6007 return -ENOENT;
6008 err = 0;
6009 if (mddev->pers) {
6010 mddev->pers->quiesce(mddev, 1);
6011 if (fd >= 0) {
6012 err = bitmap_create(mddev);
6013 if (!err)
6014 err = bitmap_load(mddev);
6015 }
6016 if (fd < 0 || err) {
6017 bitmap_destroy(mddev);
6018 fd = -1;
6019 }
6020 mddev->pers->quiesce(mddev, 0);
6021 }
6022 if (fd < 0) {
6023 if (mddev->bitmap_info.file) {
6024 restore_bitmap_write_access(mddev->bitmap_info.file);
6025 fput(mddev->bitmap_info.file);
6026 }
6027 mddev->bitmap_info.file = NULL;
6028 }
6029
6030 return err;
6031}
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046static int set_array_info(struct mddev * mddev, mdu_array_info_t *info)
6047{
6048
6049 if (info->raid_disks == 0) {
6050
6051 if (info->major_version < 0 ||
6052 info->major_version >= ARRAY_SIZE(super_types) ||
6053 super_types[info->major_version].name == NULL) {
6054
6055 printk(KERN_INFO
6056 "md: superblock version %d not known\n",
6057 info->major_version);
6058 return -EINVAL;
6059 }
6060 mddev->major_version = info->major_version;
6061 mddev->minor_version = info->minor_version;
6062 mddev->patch_version = info->patch_version;
6063 mddev->persistent = !info->not_persistent;
6064
6065
6066
6067 mddev->ctime = get_seconds();
6068 return 0;
6069 }
6070 mddev->major_version = MD_MAJOR_VERSION;
6071 mddev->minor_version = MD_MINOR_VERSION;
6072 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6073 mddev->ctime = get_seconds();
6074
6075 mddev->level = info->level;
6076 mddev->clevel[0] = 0;
6077 mddev->dev_sectors = 2 * (sector_t)info->size;
6078 mddev->raid_disks = info->raid_disks;
6079
6080
6081
6082 if (info->state & (1<<MD_SB_CLEAN))
6083 mddev->recovery_cp = MaxSector;
6084 else
6085 mddev->recovery_cp = 0;
6086 mddev->persistent = ! info->not_persistent;
6087 mddev->external = 0;
6088
6089 mddev->layout = info->layout;
6090 mddev->chunk_sectors = info->chunk_size >> 9;
6091
6092 mddev->max_disks = MD_SB_DISKS;
6093
6094 if (mddev->persistent)
6095 mddev->flags = 0;
6096 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6097
6098 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6099 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6100 mddev->bitmap_info.offset = 0;
6101
6102 mddev->reshape_position = MaxSector;
6103
6104
6105
6106
6107 get_random_bytes(mddev->uuid, 16);
6108
6109 mddev->new_level = mddev->level;
6110 mddev->new_chunk_sectors = mddev->chunk_sectors;
6111 mddev->new_layout = mddev->layout;
6112 mddev->delta_disks = 0;
6113 mddev->reshape_backwards = 0;
6114
6115 return 0;
6116}
6117
6118void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6119{
6120 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
6121
6122 if (mddev->external_size)
6123 return;
6124
6125 mddev->array_sectors = array_sectors;
6126}
6127EXPORT_SYMBOL(md_set_array_sectors);
6128
6129static int update_size(struct mddev *mddev, sector_t num_sectors)
6130{
6131 struct md_rdev *rdev;
6132 int rv;
6133 int fit = (num_sectors == 0);
6134
6135 if (mddev->pers->resize == NULL)
6136 return -EINVAL;
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146 if (mddev->sync_thread)
6147 return -EBUSY;
6148
6149 rdev_for_each(rdev, mddev) {
6150 sector_t avail = rdev->sectors;
6151
6152 if (fit && (num_sectors == 0 || num_sectors > avail))
6153 num_sectors = avail;
6154 if (avail < num_sectors)
6155 return -ENOSPC;
6156 }
6157 rv = mddev->pers->resize(mddev, num_sectors);
6158 if (!rv)
6159 revalidate_disk(mddev->gendisk);
6160 return rv;
6161}
6162
6163static int update_raid_disks(struct mddev *mddev, int raid_disks)
6164{
6165 int rv;
6166 struct md_rdev *rdev;
6167
6168 if (mddev->pers->check_reshape == NULL)
6169 return -EINVAL;
6170 if (raid_disks <= 0 ||
6171 (mddev->max_disks && raid_disks >= mddev->max_disks))
6172 return -EINVAL;
6173 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
6174 return -EBUSY;
6175
6176 rdev_for_each(rdev, mddev) {
6177 if (mddev->raid_disks < raid_disks &&
6178 rdev->data_offset < rdev->new_data_offset)
6179 return -EINVAL;
6180 if (mddev->raid_disks > raid_disks &&
6181 rdev->data_offset > rdev->new_data_offset)
6182 return -EINVAL;
6183 }
6184
6185 mddev->delta_disks = raid_disks - mddev->raid_disks;
6186 if (mddev->delta_disks < 0)
6187 mddev->reshape_backwards = 1;
6188 else if (mddev->delta_disks > 0)
6189 mddev->reshape_backwards = 0;
6190
6191 rv = mddev->pers->check_reshape(mddev);
6192 if (rv < 0) {
6193 mddev->delta_disks = 0;
6194 mddev->reshape_backwards = 0;
6195 }
6196 return rv;
6197}
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6209{
6210 int rv = 0;
6211 int cnt = 0;
6212 int state = 0;
6213
6214
6215 if (mddev->bitmap && mddev->bitmap_info.offset)
6216 state |= (1 << MD_SB_BITMAP_PRESENT);
6217
6218 if (mddev->major_version != info->major_version ||
6219 mddev->minor_version != info->minor_version ||
6220
6221 mddev->ctime != info->ctime ||
6222 mddev->level != info->level ||
6223
6224 !mddev->persistent != info->not_persistent||
6225 mddev->chunk_sectors != info->chunk_size >> 9 ||
6226
6227 ((state^info->state) & 0xfffffe00)
6228 )
6229 return -EINVAL;
6230
6231 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6232 cnt++;
6233 if (mddev->raid_disks != info->raid_disks)
6234 cnt++;
6235 if (mddev->layout != info->layout)
6236 cnt++;
6237 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
6238 cnt++;
6239 if (cnt == 0)
6240 return 0;
6241 if (cnt > 1)
6242 return -EINVAL;
6243
6244 if (mddev->layout != info->layout) {
6245
6246
6247
6248
6249 if (mddev->pers->check_reshape == NULL)
6250 return -EINVAL;
6251 else {
6252 mddev->new_layout = info->layout;
6253 rv = mddev->pers->check_reshape(mddev);
6254 if (rv)
6255 mddev->new_layout = mddev->layout;
6256 return rv;
6257 }
6258 }
6259 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6260 rv = update_size(mddev, (sector_t)info->size * 2);
6261
6262 if (mddev->raid_disks != info->raid_disks)
6263 rv = update_raid_disks(mddev, info->raid_disks);
6264
6265 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6266 if (mddev->pers->quiesce == NULL)
6267 return -EINVAL;
6268 if (mddev->recovery || mddev->sync_thread)
6269 return -EBUSY;
6270 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6271
6272 if (mddev->bitmap)
6273 return -EEXIST;
6274 if (mddev->bitmap_info.default_offset == 0)
6275 return -EINVAL;
6276 mddev->bitmap_info.offset =
6277 mddev->bitmap_info.default_offset;
6278 mddev->bitmap_info.space =
6279 mddev->bitmap_info.default_space;
6280 mddev->pers->quiesce(mddev, 1);
6281 rv = bitmap_create(mddev);
6282 if (!rv)
6283 rv = bitmap_load(mddev);
6284 if (rv)
6285 bitmap_destroy(mddev);
6286 mddev->pers->quiesce(mddev, 0);
6287 } else {
6288
6289 if (!mddev->bitmap)
6290 return -ENOENT;
6291 if (mddev->bitmap->storage.file)
6292 return -EINVAL;
6293 mddev->pers->quiesce(mddev, 1);
6294 bitmap_destroy(mddev);
6295 mddev->pers->quiesce(mddev, 0);
6296 mddev->bitmap_info.offset = 0;
6297 }
6298 }
6299 md_update_sb(mddev, 1);
6300 return rv;
6301}
6302
6303static int set_disk_faulty(struct mddev *mddev, dev_t dev)
6304{
6305 struct md_rdev *rdev;
6306 int err = 0;
6307
6308 if (mddev->pers == NULL)
6309 return -ENODEV;
6310
6311 rcu_read_lock();
6312 rdev = find_rdev_rcu(mddev, dev);
6313 if (!rdev)
6314 err = -ENODEV;
6315 else {
6316 md_error(mddev, rdev);
6317 if (!test_bit(Faulty, &rdev->flags))
6318 err = -EBUSY;
6319 }
6320 rcu_read_unlock();
6321 return err;
6322}
6323
6324
6325
6326
6327
6328
6329
6330static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
6331{
6332 struct mddev *mddev = bdev->bd_disk->private_data;
6333
6334 geo->heads = 2;
6335 geo->sectors = 4;
6336 geo->cylinders = mddev->array_sectors / 8;
6337 return 0;
6338}
6339
6340static int md_ioctl(struct block_device *bdev, fmode_t mode,
6341 unsigned int cmd, unsigned long arg)
6342{
6343 int err = 0;
6344 void __user *argp = (void __user *)arg;
6345 struct mddev *mddev = NULL;
6346 int ro;
6347
6348 switch (cmd) {
6349 case RAID_VERSION:
6350 case GET_ARRAY_INFO:
6351 case GET_DISK_INFO:
6352 break;
6353 default:
6354 if (!capable(CAP_SYS_ADMIN))
6355 return -EACCES;
6356 }
6357
6358
6359
6360
6361
6362 switch (cmd) {
6363 case RAID_VERSION:
6364 err = get_version(argp);
6365 goto done;
6366
6367 case PRINT_RAID_DEBUG:
6368 err = 0;
6369 md_print_devices();
6370 goto done;
6371
6372#ifndef MODULE
6373 case RAID_AUTORUN:
6374 err = 0;
6375 autostart_arrays(arg);
6376 goto done;
6377#endif
6378 default:;
6379 }
6380
6381
6382
6383
6384
6385 mddev = bdev->bd_disk->private_data;
6386
6387 if (!mddev) {
6388 BUG();
6389 goto abort;
6390 }
6391
6392
6393 switch (cmd) {
6394 case GET_ARRAY_INFO:
6395 if (!mddev->raid_disks && !mddev->external)
6396 err = -ENODEV;
6397 else
6398 err = get_array_info(mddev, argp);
6399 goto abort;
6400
6401 case GET_DISK_INFO:
6402 if (!mddev->raid_disks && !mddev->external)
6403 err = -ENODEV;
6404 else
6405 err = get_disk_info(mddev, argp);
6406 goto abort;
6407
6408 case SET_DISK_FAULTY:
6409 err = set_disk_faulty(mddev, new_decode_dev(arg));
6410 goto abort;
6411 }
6412
6413 if (cmd == ADD_NEW_DISK)
6414
6415 flush_workqueue(md_misc_wq);
6416
6417 if (cmd == HOT_REMOVE_DISK)
6418
6419 wait_event_interruptible_timeout(mddev->sb_wait,
6420 !test_bit(MD_RECOVERY_NEEDED,
6421 &mddev->flags),
6422 msecs_to_jiffies(5000));
6423 err = mddev_lock(mddev);
6424 if (err) {
6425 printk(KERN_INFO
6426 "md: ioctl lock interrupted, reason %d, cmd %d\n",
6427 err, cmd);
6428 goto abort;
6429 }
6430
6431 if (cmd == SET_ARRAY_INFO) {
6432 mdu_array_info_t info;
6433 if (!arg)
6434 memset(&info, 0, sizeof(info));
6435 else if (copy_from_user(&info, argp, sizeof(info))) {
6436 err = -EFAULT;
6437 goto abort_unlock;
6438 }
6439 if (mddev->pers) {
6440 err = update_array_info(mddev, &info);
6441 if (err) {
6442 printk(KERN_WARNING "md: couldn't update"
6443 " array info. %d\n", err);
6444 goto abort_unlock;
6445 }
6446 goto done_unlock;
6447 }
6448 if (!list_empty(&mddev->disks)) {
6449 printk(KERN_WARNING
6450 "md: array %s already has disks!\n",
6451 mdname(mddev));
6452 err = -EBUSY;
6453 goto abort_unlock;
6454 }
6455 if (mddev->raid_disks) {
6456 printk(KERN_WARNING
6457 "md: array %s already initialised!\n",
6458 mdname(mddev));
6459 err = -EBUSY;
6460 goto abort_unlock;
6461 }
6462 err = set_array_info(mddev, &info);
6463 if (err) {
6464 printk(KERN_WARNING "md: couldn't set"
6465 " array info. %d\n", err);
6466 goto abort_unlock;
6467 }
6468 goto done_unlock;
6469 }
6470
6471
6472
6473
6474
6475
6476 if ((!mddev->raid_disks && !mddev->external)
6477 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
6478 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
6479 && cmd != GET_BITMAP_FILE) {
6480 err = -ENODEV;
6481 goto abort_unlock;
6482 }
6483
6484
6485
6486
6487 switch (cmd) {
6488 case GET_BITMAP_FILE:
6489 err = get_bitmap_file(mddev, argp);
6490 goto done_unlock;
6491
6492 case RESTART_ARRAY_RW:
6493 err = restart_array(mddev);
6494 goto done_unlock;
6495
6496 case STOP_ARRAY:
6497 err = do_md_stop(mddev, 0, bdev);
6498 goto done_unlock;
6499
6500 case STOP_ARRAY_RO:
6501 err = md_set_readonly(mddev, bdev);
6502 goto done_unlock;
6503
6504 case HOT_REMOVE_DISK:
6505 err = hot_remove_disk(mddev, new_decode_dev(arg));
6506 goto done_unlock;
6507
6508 case ADD_NEW_DISK:
6509
6510
6511
6512
6513 if (mddev->pers) {
6514 mdu_disk_info_t info;
6515 if (copy_from_user(&info, argp, sizeof(info)))
6516 err = -EFAULT;
6517 else if (!(info.state & (1<<MD_DISK_SYNC)))
6518
6519 break;
6520 else
6521 err = add_new_disk(mddev, &info);
6522 goto done_unlock;
6523 }
6524 break;
6525
6526 case BLKROSET:
6527 if (get_user(ro, (int __user *)(arg))) {
6528 err = -EFAULT;
6529 goto done_unlock;
6530 }
6531 err = -EINVAL;
6532
6533
6534
6535
6536 if (ro)
6537 goto done_unlock;
6538
6539
6540 if (mddev->ro != 1)
6541 goto done_unlock;
6542
6543
6544
6545
6546 if (mddev->pers) {
6547 err = restart_array(mddev);
6548 if (err == 0) {
6549 mddev->ro = 2;
6550 set_disk_ro(mddev->gendisk, 0);
6551 }
6552 }
6553 goto done_unlock;
6554 }
6555
6556
6557
6558
6559
6560
6561
6562
6563 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
6564 if (mddev->ro == 2) {
6565 mddev->ro = 0;
6566 sysfs_notify_dirent_safe(mddev->sysfs_state);
6567 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6568
6569
6570
6571
6572 if (test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
6573 mddev_unlock(mddev);
6574 wait_event(mddev->sb_wait,
6575 !test_bit(MD_CHANGE_DEVS, &mddev->flags) &&
6576 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
6577 mddev_lock(mddev);
6578 }
6579 } else {
6580 err = -EROFS;
6581 goto abort_unlock;
6582 }
6583 }
6584
6585 switch (cmd) {
6586 case ADD_NEW_DISK:
6587 {
6588 mdu_disk_info_t info;
6589 if (copy_from_user(&info, argp, sizeof(info)))
6590 err = -EFAULT;
6591 else
6592 err = add_new_disk(mddev, &info);
6593 goto done_unlock;
6594 }
6595
6596 case HOT_ADD_DISK:
6597 err = hot_add_disk(mddev, new_decode_dev(arg));
6598 goto done_unlock;
6599
6600 case RUN_ARRAY:
6601 err = do_md_run(mddev);
6602 goto done_unlock;
6603
6604 case SET_BITMAP_FILE:
6605 err = set_bitmap_file(mddev, (int)arg);
6606 goto done_unlock;
6607
6608 default:
6609 err = -EINVAL;
6610 goto abort_unlock;
6611 }
6612
6613done_unlock:
6614abort_unlock:
6615 if (mddev->hold_active == UNTIL_IOCTL &&
6616 err != -EINVAL)
6617 mddev->hold_active = 0;
6618 mddev_unlock(mddev);
6619
6620 return err;
6621done:
6622 if (err)
6623 MD_BUG();
6624abort:
6625 return err;
6626}
6627#ifdef CONFIG_COMPAT
6628static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
6629 unsigned int cmd, unsigned long arg)
6630{
6631 switch (cmd) {
6632 case HOT_REMOVE_DISK:
6633 case HOT_ADD_DISK:
6634 case SET_DISK_FAULTY:
6635 case SET_BITMAP_FILE:
6636
6637 break;
6638 default:
6639 arg = (unsigned long)compat_ptr(arg);
6640 break;
6641 }
6642
6643 return md_ioctl(bdev, mode, cmd, arg);
6644}
6645#endif
6646
6647static int md_open(struct block_device *bdev, fmode_t mode)
6648{
6649
6650
6651
6652
6653 struct mddev *mddev = mddev_find(bdev->bd_dev);
6654 int err;
6655
6656 if (!mddev)
6657 return -ENODEV;
6658
6659 if (mddev->gendisk != bdev->bd_disk) {
6660
6661
6662
6663 mddev_put(mddev);
6664
6665 flush_workqueue(md_misc_wq);
6666
6667 return -ERESTARTSYS;
6668 }
6669 BUG_ON(mddev != bdev->bd_disk->private_data);
6670
6671 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
6672 goto out;
6673
6674 err = 0;
6675 atomic_inc(&mddev->openers);
6676 mutex_unlock(&mddev->open_mutex);
6677
6678 check_disk_change(bdev);
6679 out:
6680 return err;
6681}
6682
6683static void md_release(struct gendisk *disk, fmode_t mode)
6684{
6685 struct mddev *mddev = disk->private_data;
6686
6687 BUG_ON(!mddev);
6688 atomic_dec(&mddev->openers);
6689 mddev_put(mddev);
6690}
6691
6692static int md_media_changed(struct gendisk *disk)
6693{
6694 struct mddev *mddev = disk->private_data;
6695
6696 return mddev->changed;
6697}
6698
6699static int md_revalidate(struct gendisk *disk)
6700{
6701 struct mddev *mddev = disk->private_data;
6702
6703 mddev->changed = 0;
6704 return 0;
6705}
6706static const struct block_device_operations md_fops =
6707{
6708 .owner = THIS_MODULE,
6709 .open = md_open,
6710 .release = md_release,
6711 .ioctl = md_ioctl,
6712#ifdef CONFIG_COMPAT
6713 .compat_ioctl = md_compat_ioctl,
6714#endif
6715 .getgeo = md_getgeo,
6716 .media_changed = md_media_changed,
6717 .revalidate_disk= md_revalidate,
6718};
6719
6720static int md_thread(void * arg)
6721{
6722 struct md_thread *thread = arg;
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736 allow_signal(SIGKILL);
6737 while (!kthread_should_stop()) {
6738
6739
6740
6741
6742
6743
6744 if (signal_pending(current))
6745 flush_signals(current);
6746
6747 wait_event_interruptible_timeout
6748 (thread->wqueue,
6749 test_bit(THREAD_WAKEUP, &thread->flags)
6750 || kthread_should_stop(),
6751 thread->timeout);
6752
6753 clear_bit(THREAD_WAKEUP, &thread->flags);
6754 if (!kthread_should_stop())
6755 thread->run(thread);
6756 }
6757
6758 return 0;
6759}
6760
6761void md_wakeup_thread(struct md_thread *thread)
6762{
6763 if (thread) {
6764 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
6765 set_bit(THREAD_WAKEUP, &thread->flags);
6766 wake_up(&thread->wqueue);
6767 }
6768}
6769
6770struct md_thread *md_register_thread(void (*run) (struct md_thread *),
6771 struct mddev *mddev, const char *name)
6772{
6773 struct md_thread *thread;
6774
6775 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
6776 if (!thread)
6777 return NULL;
6778
6779 init_waitqueue_head(&thread->wqueue);
6780
6781 thread->run = run;
6782 thread->mddev = mddev;
6783 thread->timeout = MAX_SCHEDULE_TIMEOUT;
6784 thread->tsk = kthread_run(md_thread, thread,
6785 "%s_%s",
6786 mdname(thread->mddev),
6787 name);
6788 if (IS_ERR(thread->tsk)) {
6789 kfree(thread);
6790 return NULL;
6791 }
6792 return thread;
6793}
6794
6795void md_unregister_thread(struct md_thread **threadp)
6796{
6797 struct md_thread *thread = *threadp;
6798 if (!thread)
6799 return;
6800 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
6801
6802
6803
6804 spin_lock(&pers_lock);
6805 *threadp = NULL;
6806 spin_unlock(&pers_lock);
6807
6808 kthread_stop(thread->tsk);
6809 kfree(thread);
6810}
6811
6812void md_error(struct mddev *mddev, struct md_rdev *rdev)
6813{
6814 if (!mddev) {
6815 MD_BUG();
6816 return;
6817 }
6818
6819 if (!rdev || test_bit(Faulty, &rdev->flags))
6820 return;
6821
6822 if (!mddev->pers || !mddev->pers->error_handler)
6823 return;
6824 mddev->pers->error_handler(mddev,rdev);
6825 if (mddev->degraded)
6826 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6827 sysfs_notify_dirent_safe(rdev->sysfs_state);
6828 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6829 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6830 md_wakeup_thread(mddev->thread);
6831 if (mddev->event_work.func)
6832 queue_work(md_misc_wq, &mddev->event_work);
6833 md_new_event_inintr(mddev);
6834}
6835
6836
6837
6838static void status_unused(struct seq_file *seq)
6839{
6840 int i = 0;
6841 struct md_rdev *rdev;
6842
6843 seq_printf(seq, "unused devices: ");
6844
6845 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
6846 char b[BDEVNAME_SIZE];
6847 i++;
6848 seq_printf(seq, "%s ",
6849 bdevname(rdev->bdev,b));
6850 }
6851 if (!i)
6852 seq_printf(seq, "<none>");
6853
6854 seq_printf(seq, "\n");
6855}
6856
6857
6858static void status_resync(struct seq_file *seq, struct mddev * mddev)
6859{
6860 sector_t max_sectors, resync, res;
6861 unsigned long dt, db;
6862 sector_t rt;
6863 int scale;
6864 unsigned int per_milli;
6865
6866 if (mddev->curr_resync <= 3)
6867 resync = 0;
6868 else
6869 resync = mddev->curr_resync
6870 - atomic_read(&mddev->recovery_active);
6871
6872 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
6873 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
6874 max_sectors = mddev->resync_max_sectors;
6875 else
6876 max_sectors = mddev->dev_sectors;
6877
6878
6879
6880
6881 if (!max_sectors) {
6882 MD_BUG();
6883 return;
6884 }
6885
6886
6887
6888
6889
6890 scale = 10;
6891 if (sizeof(sector_t) > sizeof(unsigned long)) {
6892 while ( max_sectors/2 > (1ULL<<(scale+32)))
6893 scale++;
6894 }
6895 res = (resync>>scale)*1000;
6896 sector_div(res, (u32)((max_sectors>>scale)+1));
6897
6898 per_milli = res;
6899 {
6900 int i, x = per_milli/50, y = 20-x;
6901 seq_printf(seq, "[");
6902 for (i = 0; i < x; i++)
6903 seq_printf(seq, "=");
6904 seq_printf(seq, ">");
6905 for (i = 0; i < y; i++)
6906 seq_printf(seq, ".");
6907 seq_printf(seq, "] ");
6908 }
6909 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
6910 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
6911 "reshape" :
6912 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
6913 "check" :
6914 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
6915 "resync" : "recovery"))),
6916 per_milli/10, per_milli % 10,
6917 (unsigned long long) resync/2,
6918 (unsigned long long) max_sectors/2);
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934 dt = ((jiffies - mddev->resync_mark) / HZ);
6935 if (!dt) dt++;
6936 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
6937 - mddev->resync_mark_cnt;
6938
6939 rt = max_sectors - resync;
6940 sector_div(rt, db/32+1);
6941 rt *= dt;
6942 rt >>= 5;
6943
6944 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
6945 ((unsigned long)rt % 60)/6);
6946
6947 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
6948}
6949
6950static void *md_seq_start(struct seq_file *seq, loff_t *pos)
6951{
6952 struct list_head *tmp;
6953 loff_t l = *pos;
6954 struct mddev *mddev;
6955
6956 if (l >= 0x10000)
6957 return NULL;
6958 if (!l--)
6959
6960 return (void*)1;
6961
6962 spin_lock(&all_mddevs_lock);
6963 list_for_each(tmp,&all_mddevs)
6964 if (!l--) {
6965 mddev = list_entry(tmp, struct mddev, all_mddevs);
6966 mddev_get(mddev);
6967 spin_unlock(&all_mddevs_lock);
6968 return mddev;
6969 }
6970 spin_unlock(&all_mddevs_lock);
6971 if (!l--)
6972 return (void*)2;
6973 return NULL;
6974}
6975
6976static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
6977{
6978 struct list_head *tmp;
6979 struct mddev *next_mddev, *mddev = v;
6980
6981 ++*pos;
6982 if (v == (void*)2)
6983 return NULL;
6984
6985 spin_lock(&all_mddevs_lock);
6986 if (v == (void*)1)
6987 tmp = all_mddevs.next;
6988 else
6989 tmp = mddev->all_mddevs.next;
6990 if (tmp != &all_mddevs)
6991 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
6992 else {
6993 next_mddev = (void*)2;
6994 *pos = 0x10000;
6995 }
6996 spin_unlock(&all_mddevs_lock);
6997
6998 if (v != (void*)1)
6999 mddev_put(mddev);
7000 return next_mddev;
7001
7002}
7003
7004static void md_seq_stop(struct seq_file *seq, void *v)
7005{
7006 struct mddev *mddev = v;
7007
7008 if (mddev && v != (void*)1 && v != (void*)2)
7009 mddev_put(mddev);
7010}
7011
7012static int md_seq_show(struct seq_file *seq, void *v)
7013{
7014 struct mddev *mddev = v;
7015 sector_t sectors;
7016 struct md_rdev *rdev;
7017
7018 if (v == (void*)1) {
7019 struct md_personality *pers;
7020 seq_printf(seq, "Personalities : ");
7021 spin_lock(&pers_lock);
7022 list_for_each_entry(pers, &pers_list, list)
7023 seq_printf(seq, "[%s] ", pers->name);
7024
7025 spin_unlock(&pers_lock);
7026 seq_printf(seq, "\n");
7027 seq->poll_event = atomic_read(&md_event_count);
7028 return 0;
7029 }
7030 if (v == (void*)2) {
7031 status_unused(seq);
7032 return 0;
7033 }
7034
7035 if (mddev_lock(mddev) < 0)
7036 return -EINTR;
7037
7038 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
7039 seq_printf(seq, "%s : %sactive", mdname(mddev),
7040 mddev->pers ? "" : "in");
7041 if (mddev->pers) {
7042 if (mddev->ro==1)
7043 seq_printf(seq, " (read-only)");
7044 if (mddev->ro==2)
7045 seq_printf(seq, " (auto-read-only)");
7046 seq_printf(seq, " %s", mddev->pers->name);
7047 }
7048
7049 sectors = 0;
7050 rdev_for_each(rdev, mddev) {
7051 char b[BDEVNAME_SIZE];
7052 seq_printf(seq, " %s[%d]",
7053 bdevname(rdev->bdev,b), rdev->desc_nr);
7054 if (test_bit(WriteMostly, &rdev->flags))
7055 seq_printf(seq, "(W)");
7056 if (test_bit(Faulty, &rdev->flags)) {
7057 seq_printf(seq, "(F)");
7058 continue;
7059 }
7060 if (rdev->raid_disk < 0)
7061 seq_printf(seq, "(S)");
7062 if (test_bit(Replacement, &rdev->flags))
7063 seq_printf(seq, "(R)");
7064 sectors += rdev->sectors;
7065 }
7066
7067 if (!list_empty(&mddev->disks)) {
7068 if (mddev->pers)
7069 seq_printf(seq, "\n %llu blocks",
7070 (unsigned long long)
7071 mddev->array_sectors / 2);
7072 else
7073 seq_printf(seq, "\n %llu blocks",
7074 (unsigned long long)sectors / 2);
7075 }
7076 if (mddev->persistent) {
7077 if (mddev->major_version != 0 ||
7078 mddev->minor_version != 90) {
7079 seq_printf(seq," super %d.%d",
7080 mddev->major_version,
7081 mddev->minor_version);
7082 }
7083 } else if (mddev->external)
7084 seq_printf(seq, " super external:%s",
7085 mddev->metadata_type);
7086 else
7087 seq_printf(seq, " super non-persistent");
7088
7089 if (mddev->pers) {
7090 mddev->pers->status(seq, mddev);
7091 seq_printf(seq, "\n ");
7092 if (mddev->pers->sync_request) {
7093 if (mddev->curr_resync > 2) {
7094 status_resync(seq, mddev);
7095 seq_printf(seq, "\n ");
7096 } else if (mddev->curr_resync >= 1)
7097 seq_printf(seq, "\tresync=DELAYED\n ");
7098 else if (mddev->recovery_cp < MaxSector)
7099 seq_printf(seq, "\tresync=PENDING\n ");
7100 }
7101 } else
7102 seq_printf(seq, "\n ");
7103
7104 bitmap_status(seq, mddev->bitmap);
7105
7106 seq_printf(seq, "\n");
7107 }
7108 mddev_unlock(mddev);
7109
7110 return 0;
7111}
7112
7113static const struct seq_operations md_seq_ops = {
7114 .start = md_seq_start,
7115 .next = md_seq_next,
7116 .stop = md_seq_stop,
7117 .show = md_seq_show,
7118};
7119
7120static int md_seq_open(struct inode *inode, struct file *file)
7121{
7122 struct seq_file *seq;
7123 int error;
7124
7125 error = seq_open(file, &md_seq_ops);
7126 if (error)
7127 return error;
7128
7129 seq = file->private_data;
7130 seq->poll_event = atomic_read(&md_event_count);
7131 return error;
7132}
7133
7134static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
7135{
7136 struct seq_file *seq = filp->private_data;
7137 int mask;
7138
7139 poll_wait(filp, &md_event_waiters, wait);
7140
7141
7142 mask = POLLIN | POLLRDNORM;
7143
7144 if (seq->poll_event != atomic_read(&md_event_count))
7145 mask |= POLLERR | POLLPRI;
7146 return mask;
7147}
7148
7149static const struct file_operations md_seq_fops = {
7150 .owner = THIS_MODULE,
7151 .open = md_seq_open,
7152 .read = seq_read,
7153 .llseek = seq_lseek,
7154 .release = seq_release_private,
7155 .poll = mdstat_poll,
7156};
7157
7158int register_md_personality(struct md_personality *p)
7159{
7160 spin_lock(&pers_lock);
7161 list_add_tail(&p->list, &pers_list);
7162 printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level);
7163 spin_unlock(&pers_lock);
7164 return 0;
7165}
7166
7167int unregister_md_personality(struct md_personality *p)
7168{
7169 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
7170 spin_lock(&pers_lock);
7171 list_del_init(&p->list);
7172 spin_unlock(&pers_lock);
7173 return 0;
7174}
7175
7176static int is_mddev_idle(struct mddev *mddev, int init)
7177{
7178 struct md_rdev * rdev;
7179 int idle;
7180 int curr_events;
7181
7182 idle = 1;
7183 rcu_read_lock();
7184 rdev_for_each_rcu(rdev, mddev) {
7185 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
7186 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
7187 (int)part_stat_read(&disk->part0, sectors[1]) -
7188 atomic_read(&disk->sync_io);
7189
7190
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211 if (init || curr_events - rdev->last_events > 64) {
7212 rdev->last_events = curr_events;
7213 idle = 0;
7214 }
7215 }
7216 rcu_read_unlock();
7217 return idle;
7218}
7219
7220void md_done_sync(struct mddev *mddev, int blocks, int ok)
7221{
7222
7223 atomic_sub(blocks, &mddev->recovery_active);
7224 wake_up(&mddev->recovery_wait);
7225 if (!ok) {
7226 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7227 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
7228 md_wakeup_thread(mddev->thread);
7229
7230 }
7231}
7232
7233
7234
7235
7236
7237
7238
7239void md_write_start(struct mddev *mddev, struct bio *bi)
7240{
7241 int did_change = 0;
7242 if (bio_data_dir(bi) != WRITE)
7243 return;
7244
7245 BUG_ON(mddev->ro == 1);
7246 if (mddev->ro == 2) {
7247
7248 mddev->ro = 0;
7249 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7250 md_wakeup_thread(mddev->thread);
7251 md_wakeup_thread(mddev->sync_thread);
7252 did_change = 1;
7253 }
7254 atomic_inc(&mddev->writes_pending);
7255 if (mddev->safemode == 1)
7256 mddev->safemode = 0;
7257 if (mddev->in_sync) {
7258 spin_lock_irq(&mddev->write_lock);
7259 if (mddev->in_sync) {
7260 mddev->in_sync = 0;
7261 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7262 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7263 md_wakeup_thread(mddev->thread);
7264 did_change = 1;
7265 }
7266 spin_unlock_irq(&mddev->write_lock);
7267 }
7268 if (did_change)
7269 sysfs_notify_dirent_safe(mddev->sysfs_state);
7270 wait_event(mddev->sb_wait,
7271 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
7272}
7273
7274void md_write_end(struct mddev *mddev)
7275{
7276 if (atomic_dec_and_test(&mddev->writes_pending)) {
7277 if (mddev->safemode == 2)
7278 md_wakeup_thread(mddev->thread);
7279 else if (mddev->safemode_delay)
7280 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
7281 }
7282}
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293int md_allow_write(struct mddev *mddev)
7294{
7295 if (!mddev->pers)
7296 return 0;
7297 if (mddev->ro)
7298 return 0;
7299 if (!mddev->pers->sync_request)
7300 return 0;
7301
7302 spin_lock_irq(&mddev->write_lock);
7303 if (mddev->in_sync) {
7304 mddev->in_sync = 0;
7305 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7306 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7307 if (mddev->safemode_delay &&
7308 mddev->safemode == 0)
7309 mddev->safemode = 1;
7310 spin_unlock_irq(&mddev->write_lock);
7311 md_update_sb(mddev, 0);
7312 sysfs_notify_dirent_safe(mddev->sysfs_state);
7313 } else
7314 spin_unlock_irq(&mddev->write_lock);
7315
7316 if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
7317 return -EAGAIN;
7318 else
7319 return 0;
7320}
7321EXPORT_SYMBOL_GPL(md_allow_write);
7322
7323#define SYNC_MARKS 10
7324#define SYNC_MARK_STEP (3*HZ)
7325#define UPDATE_FREQUENCY (5*60*HZ)
7326void md_do_sync(struct md_thread *thread)
7327{
7328 struct mddev *mddev = thread->mddev;
7329 struct mddev *mddev2;
7330 unsigned int currspeed = 0,
7331 window;
7332 sector_t max_sectors,j, io_sectors;
7333 unsigned long mark[SYNC_MARKS];
7334 unsigned long update_time;
7335 sector_t mark_cnt[SYNC_MARKS];
7336 int last_mark,m;
7337 struct list_head *tmp;
7338 sector_t last_check;
7339 int skipped = 0;
7340 struct md_rdev *rdev;
7341 char *desc, *action = NULL;
7342 struct blk_plug plug;
7343
7344
7345 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7346 return;
7347 if (mddev->ro)
7348 return;
7349
7350 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7351 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
7352 desc = "data-check";
7353 action = "check";
7354 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7355 desc = "requested-resync";
7356 action = "repair";
7357 } else
7358 desc = "resync";
7359 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7360 desc = "reshape";
7361 else
7362 desc = "recovery";
7363
7364 mddev->last_sync_action = action ?: desc;
7365
7366
7367
7368
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382 do {
7383 mddev->curr_resync = 2;
7384
7385 try_again:
7386 if (kthread_should_stop())
7387 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7388
7389 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7390 goto skip;
7391 for_each_mddev(mddev2, tmp) {
7392 if (mddev2 == mddev)
7393 continue;
7394 if (!mddev->parallel_resync
7395 && mddev2->curr_resync
7396 && match_mddev_units(mddev, mddev2)) {
7397 DEFINE_WAIT(wq);
7398 if (mddev < mddev2 && mddev->curr_resync == 2) {
7399
7400 mddev->curr_resync = 1;
7401 wake_up(&resync_wait);
7402 }
7403 if (mddev > mddev2 && mddev->curr_resync == 1)
7404
7405
7406
7407 continue;
7408
7409
7410
7411
7412 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
7413 if (!kthread_should_stop() &&
7414 mddev2->curr_resync >= mddev->curr_resync) {
7415 printk(KERN_INFO "md: delaying %s of %s"
7416 " until %s has finished (they"
7417 " share one or more physical units)\n",
7418 desc, mdname(mddev), mdname(mddev2));
7419 mddev_put(mddev2);
7420 if (signal_pending(current))
7421 flush_signals(current);
7422 schedule();
7423 finish_wait(&resync_wait, &wq);
7424 goto try_again;
7425 }
7426 finish_wait(&resync_wait, &wq);
7427 }
7428 }
7429 } while (mddev->curr_resync < 2);
7430
7431 j = 0;
7432 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7433
7434
7435
7436 max_sectors = mddev->resync_max_sectors;
7437 atomic64_set(&mddev->resync_mismatches, 0);
7438
7439 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7440 j = mddev->resync_min;
7441 else if (!mddev->bitmap)
7442 j = mddev->recovery_cp;
7443
7444 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7445 max_sectors = mddev->resync_max_sectors;
7446 else {
7447
7448 max_sectors = mddev->dev_sectors;
7449 j = MaxSector;
7450 rcu_read_lock();
7451 rdev_for_each_rcu(rdev, mddev)
7452 if (rdev->raid_disk >= 0 &&
7453 !test_bit(Faulty, &rdev->flags) &&
7454 !test_bit(In_sync, &rdev->flags) &&
7455 rdev->recovery_offset < j)
7456 j = rdev->recovery_offset;
7457 rcu_read_unlock();
7458 }
7459
7460 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
7461 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
7462 " %d KB/sec/disk.\n", speed_min(mddev));
7463 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
7464 "(but not more than %d KB/sec) for %s.\n",
7465 speed_max(mddev), desc);
7466
7467 is_mddev_idle(mddev, 1);
7468
7469 io_sectors = 0;
7470 for (m = 0; m < SYNC_MARKS; m++) {
7471 mark[m] = jiffies;
7472 mark_cnt[m] = io_sectors;
7473 }
7474 last_mark = 0;
7475 mddev->resync_mark = mark[last_mark];
7476 mddev->resync_mark_cnt = mark_cnt[last_mark];
7477
7478
7479
7480
7481 window = 32*(PAGE_SIZE/512);
7482 printk(KERN_INFO "md: using %dk window, over a total of %lluk.\n",
7483 window/2, (unsigned long long)max_sectors/2);
7484
7485 atomic_set(&mddev->recovery_active, 0);
7486 last_check = 0;
7487
7488 if (j>2) {
7489 printk(KERN_INFO
7490 "md: resuming %s of %s from checkpoint.\n",
7491 desc, mdname(mddev));
7492 mddev->curr_resync = j;
7493 } else
7494 mddev->curr_resync = 3;
7495 mddev->curr_resync_completed = j;
7496 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7497 md_new_event(mddev);
7498 update_time = jiffies;
7499
7500 blk_start_plug(&plug);
7501 while (j < max_sectors) {
7502 sector_t sectors;
7503
7504 skipped = 0;
7505
7506 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7507 ((mddev->curr_resync > mddev->curr_resync_completed &&
7508 (mddev->curr_resync - mddev->curr_resync_completed)
7509 > (max_sectors >> 4)) ||
7510 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
7511 (j - mddev->curr_resync_completed)*2
7512 >= mddev->resync_max - mddev->curr_resync_completed
7513 )) {
7514
7515 wait_event(mddev->recovery_wait,
7516 atomic_read(&mddev->recovery_active) == 0);
7517 mddev->curr_resync_completed = j;
7518 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
7519 j > mddev->recovery_cp)
7520 mddev->recovery_cp = j;
7521 update_time = jiffies;
7522 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7523 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7524 }
7525
7526 while (j >= mddev->resync_max && !kthread_should_stop()) {
7527
7528
7529
7530
7531 flush_signals(current);
7532 wait_event_interruptible(mddev->recovery_wait,
7533 mddev->resync_max > j
7534 || kthread_should_stop());
7535 }
7536
7537 if (kthread_should_stop())
7538 goto interrupted;
7539
7540 sectors = mddev->pers->sync_request(mddev, j, &skipped,
7541 currspeed < speed_min(mddev));
7542 if (sectors == 0) {
7543 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7544 goto out;
7545 }
7546
7547 if (!skipped) {
7548 io_sectors += sectors;
7549 atomic_add(sectors, &mddev->recovery_active);
7550 }
7551
7552 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7553 break;
7554
7555 j += sectors;
7556 if (j > 2)
7557 mddev->curr_resync = j;
7558 mddev->curr_mark_cnt = io_sectors;
7559 if (last_check == 0)
7560
7561
7562
7563 md_new_event(mddev);
7564
7565 if (last_check + window > io_sectors || j == max_sectors)
7566 continue;
7567
7568 last_check = io_sectors;
7569 repeat:
7570 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
7571
7572 int next = (last_mark+1) % SYNC_MARKS;
7573
7574 mddev->resync_mark = mark[next];
7575 mddev->resync_mark_cnt = mark_cnt[next];
7576 mark[next] = jiffies;
7577 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
7578 last_mark = next;
7579 }
7580
7581
7582 if (kthread_should_stop())
7583 goto interrupted;
7584
7585
7586
7587
7588
7589
7590
7591
7592
7593
7594 cond_resched();
7595
7596 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
7597 /((jiffies-mddev->resync_mark)/HZ +1) +1;
7598
7599 if (currspeed > speed_min(mddev)) {
7600 if ((currspeed > speed_max(mddev)) ||
7601 !is_mddev_idle(mddev, 0)) {
7602 msleep(500);
7603 goto repeat;
7604 }
7605 }
7606 }
7607 printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
7608
7609
7610
7611 out:
7612 blk_finish_plug(&plug);
7613 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
7614
7615
7616 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
7617
7618 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
7619 mddev->curr_resync > 2) {
7620 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7621 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7622 if (mddev->curr_resync >= mddev->recovery_cp) {
7623 printk(KERN_INFO
7624 "md: checkpointing %s of %s.\n",
7625 desc, mdname(mddev));
7626 if (test_bit(MD_RECOVERY_ERROR,
7627 &mddev->recovery))
7628 mddev->recovery_cp =
7629 mddev->curr_resync_completed;
7630 else
7631 mddev->recovery_cp =
7632 mddev->curr_resync;
7633 }
7634 } else
7635 mddev->recovery_cp = MaxSector;
7636 } else {
7637 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7638 mddev->curr_resync = MaxSector;
7639 rcu_read_lock();
7640 rdev_for_each_rcu(rdev, mddev)
7641 if (rdev->raid_disk >= 0 &&
7642 mddev->delta_disks >= 0 &&
7643 !test_bit(Faulty, &rdev->flags) &&
7644 !test_bit(In_sync, &rdev->flags) &&
7645 rdev->recovery_offset < mddev->curr_resync)
7646 rdev->recovery_offset = mddev->curr_resync;
7647 rcu_read_unlock();
7648 }
7649 }
7650 skip:
7651 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7652
7653 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7654
7655 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7656 mddev->resync_min = 0;
7657 mddev->resync_max = MaxSector;
7658 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7659 mddev->resync_min = mddev->curr_resync_completed;
7660 mddev->curr_resync = 0;
7661 wake_up(&resync_wait);
7662 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
7663 md_wakeup_thread(mddev->thread);
7664 return;
7665
7666 interrupted:
7667
7668
7669
7670 printk(KERN_INFO
7671 "md: md_do_sync() got signal ... exiting\n");
7672 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7673 goto out;
7674
7675}
7676EXPORT_SYMBOL_GPL(md_do_sync);
7677
7678static int remove_and_add_spares(struct mddev *mddev,
7679 struct md_rdev *this)
7680{
7681 struct md_rdev *rdev;
7682 int spares = 0;
7683 int removed = 0;
7684
7685 rdev_for_each(rdev, mddev)
7686 if ((this == NULL || rdev == this) &&
7687 rdev->raid_disk >= 0 &&
7688 !test_bit(Blocked, &rdev->flags) &&
7689 (test_bit(Faulty, &rdev->flags) ||
7690 ! test_bit(In_sync, &rdev->flags)) &&
7691 atomic_read(&rdev->nr_pending)==0) {
7692 if (mddev->pers->hot_remove_disk(
7693 mddev, rdev) == 0) {
7694 sysfs_unlink_rdev(mddev, rdev);
7695 rdev->raid_disk = -1;
7696 removed++;
7697 }
7698 }
7699 if (removed && mddev->kobj.sd)
7700 sysfs_notify(&mddev->kobj, NULL, "degraded");
7701
7702 if (this)
7703 goto no_add;
7704
7705 rdev_for_each(rdev, mddev) {
7706 if (rdev->raid_disk >= 0 &&
7707 !test_bit(In_sync, &rdev->flags) &&
7708 !test_bit(Faulty, &rdev->flags))
7709 spares++;
7710 if (rdev->raid_disk >= 0)
7711 continue;
7712 if (test_bit(Faulty, &rdev->flags))
7713 continue;
7714 if (mddev->ro &&
7715 rdev->saved_raid_disk < 0)
7716 continue;
7717
7718 rdev->recovery_offset = 0;
7719 if (mddev->pers->
7720 hot_add_disk(mddev, rdev) == 0) {
7721 if (sysfs_link_rdev(mddev, rdev))
7722 ;
7723 spares++;
7724 md_new_event(mddev);
7725 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7726 }
7727 }
7728no_add:
7729 if (removed)
7730 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7731 return spares;
7732}
7733
7734
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744
7745
7746
7747
7748
7749
7750
7751
7752
7753
7754
7755
7756void md_check_recovery(struct mddev *mddev)
7757{
7758 if (mddev->suspended)
7759 return;
7760
7761 if (mddev->bitmap)
7762 bitmap_daemon_work(mddev);
7763
7764 if (signal_pending(current)) {
7765 if (mddev->pers->sync_request && !mddev->external) {
7766 printk(KERN_INFO "md: %s in immediate safe mode\n",
7767 mdname(mddev));
7768 mddev->safemode = 2;
7769 }
7770 flush_signals(current);
7771 }
7772
7773 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
7774 return;
7775 if ( ! (
7776 (mddev->flags & ~ (1<<MD_CHANGE_PENDING)) ||
7777 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7778 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
7779 (mddev->external == 0 && mddev->safemode == 1) ||
7780 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
7781 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
7782 ))
7783 return;
7784
7785 if (mddev_trylock(mddev)) {
7786 int spares = 0;
7787
7788 if (mddev->ro) {
7789
7790
7791
7792
7793
7794
7795
7796 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7797 remove_and_add_spares(mddev, NULL);
7798 mddev->pers->spare_active(mddev);
7799 goto unlock;
7800 }
7801
7802 if (!mddev->external) {
7803 int did_change = 0;
7804 spin_lock_irq(&mddev->write_lock);
7805 if (mddev->safemode &&
7806 !atomic_read(&mddev->writes_pending) &&
7807 !mddev->in_sync &&
7808 mddev->recovery_cp == MaxSector) {
7809 mddev->in_sync = 1;
7810 did_change = 1;
7811 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7812 }
7813 if (mddev->safemode == 1)
7814 mddev->safemode = 0;
7815 spin_unlock_irq(&mddev->write_lock);
7816 if (did_change)
7817 sysfs_notify_dirent_safe(mddev->sysfs_state);
7818 }
7819
7820 if (mddev->flags)
7821 md_update_sb(mddev, 0);
7822
7823 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
7824 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
7825
7826 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7827 goto unlock;
7828 }
7829 if (mddev->sync_thread) {
7830 md_reap_sync_thread(mddev);
7831 goto unlock;
7832 }
7833
7834
7835
7836 mddev->curr_resync_completed = 0;
7837 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7838
7839
7840
7841 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
7842 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
7843
7844 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7845 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
7846 goto unlock;
7847
7848
7849
7850
7851
7852
7853
7854 if (mddev->reshape_position != MaxSector) {
7855 if (mddev->pers->check_reshape == NULL ||
7856 mddev->pers->check_reshape(mddev) != 0)
7857
7858 goto unlock;
7859 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7860 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7861 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
7862 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7863 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7864 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7865 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7866 } else if (mddev->recovery_cp < MaxSector) {
7867 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7868 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7869 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
7870
7871 goto unlock;
7872
7873 if (mddev->pers->sync_request) {
7874 if (spares) {
7875
7876
7877
7878
7879 bitmap_write_all(mddev->bitmap);
7880 }
7881 mddev->sync_thread = md_register_thread(md_do_sync,
7882 mddev,
7883 "resync");
7884 if (!mddev->sync_thread) {
7885 printk(KERN_ERR "%s: could not start resync"
7886 " thread...\n",
7887 mdname(mddev));
7888
7889 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7890 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7891 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7892 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7893 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7894 } else
7895 md_wakeup_thread(mddev->sync_thread);
7896 sysfs_notify_dirent_safe(mddev->sysfs_action);
7897 md_new_event(mddev);
7898 }
7899 unlock:
7900 wake_up(&mddev->sb_wait);
7901
7902 if (!mddev->sync_thread) {
7903 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7904 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
7905 &mddev->recovery))
7906 if (mddev->sysfs_action)
7907 sysfs_notify_dirent_safe(mddev->sysfs_action);
7908 }
7909 mddev_unlock(mddev);
7910 }
7911}
7912
7913void md_reap_sync_thread(struct mddev *mddev)
7914{
7915 struct md_rdev *rdev;
7916
7917
7918 md_unregister_thread(&mddev->sync_thread);
7919 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7920 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7921
7922
7923 if (mddev->pers->spare_active(mddev)) {
7924 sysfs_notify(&mddev->kobj, NULL,
7925 "degraded");
7926 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7927 }
7928 }
7929 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7930 mddev->pers->finish_reshape)
7931 mddev->pers->finish_reshape(mddev);
7932
7933
7934
7935
7936
7937
7938
7939 rdev_for_each(rdev, mddev)
7940 if (!mddev->degraded ||
7941 test_bit(In_sync, &rdev->flags))
7942 rdev->saved_raid_disk = -1;
7943
7944 md_update_sb(mddev, 1);
7945 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7946 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7947 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7948 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7949 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7950
7951 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7952 sysfs_notify_dirent_safe(mddev->sysfs_action);
7953 md_new_event(mddev);
7954 if (mddev->event_work.func)
7955 queue_work(md_misc_wq, &mddev->event_work);
7956}
7957
7958void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
7959{
7960 sysfs_notify_dirent_safe(rdev->sysfs_state);
7961 wait_event_timeout(rdev->blocked_wait,
7962 !test_bit(Blocked, &rdev->flags) &&
7963 !test_bit(BlockedBadBlocks, &rdev->flags),
7964 msecs_to_jiffies(5000));
7965 rdev_dec_pending(rdev, mddev);
7966}
7967EXPORT_SYMBOL(md_wait_for_blocked_rdev);
7968
7969void md_finish_reshape(struct mddev *mddev)
7970{
7971
7972 struct md_rdev *rdev;
7973
7974 rdev_for_each(rdev, mddev) {
7975 if (rdev->data_offset > rdev->new_data_offset)
7976 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
7977 else
7978 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
7979 rdev->data_offset = rdev->new_data_offset;
7980 }
7981}
7982EXPORT_SYMBOL(md_finish_reshape);
7983
7984
7985
7986
7987
7988
7989
7990
7991
7992
7993
7994
7995
7996
7997
7998
7999
8000
8001
8002
8003
8004
8005
8006
8007
8008
8009
8010int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
8011 sector_t *first_bad, int *bad_sectors)
8012{
8013 int hi;
8014 int lo;
8015 u64 *p = bb->page;
8016 int rv;
8017 sector_t target = s + sectors;
8018 unsigned seq;
8019
8020 if (bb->shift > 0) {
8021
8022 s >>= bb->shift;
8023 target += (1<<bb->shift) - 1;
8024 target >>= bb->shift;
8025 sectors = target - s;
8026 }
8027
8028
8029retry:
8030 seq = read_seqbegin(&bb->lock);
8031 lo = 0;
8032 rv = 0;
8033 hi = bb->count;
8034
8035
8036
8037
8038
8039
8040
8041
8042
8043 while (hi - lo > 1) {
8044 int mid = (lo + hi) / 2;
8045 sector_t a = BB_OFFSET(p[mid]);
8046 if (a < target)
8047
8048
8049 lo = mid;
8050 else
8051
8052 hi = mid;
8053 }
8054
8055 if (hi > lo) {
8056
8057
8058
8059 while (lo >= 0 &&
8060 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
8061 if (BB_OFFSET(p[lo]) < target) {
8062
8063
8064
8065 if (rv != -1 && BB_ACK(p[lo]))
8066 rv = 1;
8067 else
8068 rv = -1;
8069 *first_bad = BB_OFFSET(p[lo]);
8070 *bad_sectors = BB_LEN(p[lo]);
8071 }
8072 lo--;
8073 }
8074 }
8075
8076 if (read_seqretry(&bb->lock, seq))
8077 goto retry;
8078
8079 return rv;
8080}
8081EXPORT_SYMBOL_GPL(md_is_badblock);
8082
8083
8084
8085
8086
8087
8088
8089
8090static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
8091 int acknowledged)
8092{
8093 u64 *p;
8094 int lo, hi;
8095 int rv = 1;
8096
8097 if (bb->shift < 0)
8098
8099 return 0;
8100
8101 if (bb->shift) {
8102
8103 sector_t next = s + sectors;
8104 s >>= bb->shift;
8105 next += (1<<bb->shift) - 1;
8106 next >>= bb->shift;
8107 sectors = next - s;
8108 }
8109
8110 write_seqlock_irq(&bb->lock);
8111
8112 p = bb->page;
8113 lo = 0;
8114 hi = bb->count;
8115
8116 while (hi - lo > 1) {
8117 int mid = (lo + hi) / 2;
8118 sector_t a = BB_OFFSET(p[mid]);
8119 if (a <= s)
8120 lo = mid;
8121 else
8122 hi = mid;
8123 }
8124 if (hi > lo && BB_OFFSET(p[lo]) > s)
8125 hi = lo;
8126
8127 if (hi > lo) {
8128
8129
8130
8131 sector_t a = BB_OFFSET(p[lo]);
8132 sector_t e = a + BB_LEN(p[lo]);
8133 int ack = BB_ACK(p[lo]);
8134 if (e >= s) {
8135
8136 if (s == a && s + sectors >= e)
8137
8138 ack = acknowledged;
8139 else
8140 ack = ack && acknowledged;
8141
8142 if (e < s + sectors)
8143 e = s + sectors;
8144 if (e - a <= BB_MAX_LEN) {
8145 p[lo] = BB_MAKE(a, e-a, ack);
8146 s = e;
8147 } else {
8148
8149
8150
8151 if (BB_LEN(p[lo]) != BB_MAX_LEN)
8152 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
8153 s = a + BB_MAX_LEN;
8154 }
8155 sectors = e - s;
8156 }
8157 }
8158 if (sectors && hi < bb->count) {
8159
8160
8161 sector_t a = BB_OFFSET(p[hi]);
8162 sector_t e = a + BB_LEN(p[hi]);
8163 int ack = BB_ACK(p[hi]);
8164 if (a <= s + sectors) {
8165
8166 if (e <= s + sectors) {
8167
8168 e = s + sectors;
8169 ack = acknowledged;
8170 } else
8171 ack = ack && acknowledged;
8172
8173 a = s;
8174 if (e - a <= BB_MAX_LEN) {
8175 p[hi] = BB_MAKE(a, e-a, ack);
8176 s = e;
8177 } else {
8178 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
8179 s = a + BB_MAX_LEN;
8180 }
8181 sectors = e - s;
8182 lo = hi;
8183 hi++;
8184 }
8185 }
8186 if (sectors == 0 && hi < bb->count) {
8187
8188
8189 sector_t a = BB_OFFSET(p[hi]);
8190 int lolen = BB_LEN(p[lo]);
8191 int hilen = BB_LEN(p[hi]);
8192 int newlen = lolen + hilen - (s - a);
8193 if (s >= a && newlen < BB_MAX_LEN) {
8194
8195 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
8196 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
8197 memmove(p + hi, p + hi + 1,
8198 (bb->count - hi - 1) * 8);
8199 bb->count--;
8200 }
8201 }
8202 while (sectors) {
8203
8204
8205 if (bb->count >= MD_MAX_BADBLOCKS) {
8206
8207 rv = 0;
8208 break;
8209 } else {
8210 int this_sectors = sectors;
8211 memmove(p + hi + 1, p + hi,
8212 (bb->count - hi) * 8);
8213 bb->count++;
8214
8215 if (this_sectors > BB_MAX_LEN)
8216 this_sectors = BB_MAX_LEN;
8217 p[hi] = BB_MAKE(s, this_sectors, acknowledged);
8218 sectors -= this_sectors;
8219 s += this_sectors;
8220 }
8221 }
8222
8223 bb->changed = 1;
8224 if (!acknowledged)
8225 bb->unacked_exist = 1;
8226 write_sequnlock_irq(&bb->lock);
8227
8228 return rv;
8229}
8230
8231int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8232 int is_new)
8233{
8234 int rv;
8235 if (is_new)
8236 s += rdev->new_data_offset;
8237 else
8238 s += rdev->data_offset;
8239 rv = md_set_badblocks(&rdev->badblocks,
8240 s, sectors, 0);
8241 if (rv) {
8242
8243 sysfs_notify_dirent_safe(rdev->sysfs_state);
8244 set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
8245 md_wakeup_thread(rdev->mddev->thread);
8246 }
8247 return rv;
8248}
8249EXPORT_SYMBOL_GPL(rdev_set_badblocks);
8250
8251
8252
8253
8254
8255
8256
8257static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
8258{
8259 u64 *p;
8260 int lo, hi;
8261 sector_t target = s + sectors;
8262 int rv = 0;
8263
8264 if (bb->shift > 0) {
8265
8266
8267
8268
8269
8270
8271 s += (1<<bb->shift) - 1;
8272 s >>= bb->shift;
8273 target >>= bb->shift;
8274 sectors = target - s;
8275 }
8276
8277 write_seqlock_irq(&bb->lock);
8278
8279 p = bb->page;
8280 lo = 0;
8281 hi = bb->count;
8282
8283 while (hi - lo > 1) {
8284 int mid = (lo + hi) / 2;
8285 sector_t a = BB_OFFSET(p[mid]);
8286 if (a < target)
8287 lo = mid;
8288 else
8289 hi = mid;
8290 }
8291 if (hi > lo) {
8292
8293
8294
8295
8296 if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
8297
8298 int ack = BB_ACK(p[lo]);
8299 sector_t a = BB_OFFSET(p[lo]);
8300 sector_t end = a + BB_LEN(p[lo]);
8301
8302 if (a < s) {
8303
8304 if (bb->count >= MD_MAX_BADBLOCKS) {
8305 rv = 0;
8306 goto out;
8307 }
8308 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
8309 bb->count++;
8310 p[lo] = BB_MAKE(a, s-a, ack);
8311 lo++;
8312 }
8313 p[lo] = BB_MAKE(target, end - target, ack);
8314
8315 hi = lo;
8316 lo--;
8317 }
8318 while (lo >= 0 &&
8319 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
8320
8321 if (BB_OFFSET(p[lo]) < s) {
8322
8323 int ack = BB_ACK(p[lo]);
8324 sector_t start = BB_OFFSET(p[lo]);
8325 p[lo] = BB_MAKE(start, s - start, ack);
8326
8327 break;
8328 }
8329 lo--;
8330 }
8331
8332
8333
8334 if (hi - lo > 1) {
8335 memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
8336 bb->count -= (hi - lo - 1);
8337 }
8338 }
8339
8340 bb->changed = 1;
8341out:
8342 write_sequnlock_irq(&bb->lock);
8343 return rv;
8344}
8345
8346int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8347 int is_new)
8348{
8349 if (is_new)
8350 s += rdev->new_data_offset;
8351 else
8352 s += rdev->data_offset;
8353 return md_clear_badblocks(&rdev->badblocks,
8354 s, sectors);
8355}
8356EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8357
8358
8359
8360
8361
8362
8363void md_ack_all_badblocks(struct badblocks *bb)
8364{
8365 if (bb->page == NULL || bb->changed)
8366
8367 return;
8368 write_seqlock_irq(&bb->lock);
8369
8370 if (bb->changed == 0 && bb->unacked_exist) {
8371 u64 *p = bb->page;
8372 int i;
8373 for (i = 0; i < bb->count ; i++) {
8374 if (!BB_ACK(p[i])) {
8375 sector_t start = BB_OFFSET(p[i]);
8376 int len = BB_LEN(p[i]);
8377 p[i] = BB_MAKE(start, len, 1);
8378 }
8379 }
8380 bb->unacked_exist = 0;
8381 }
8382 write_sequnlock_irq(&bb->lock);
8383}
8384EXPORT_SYMBOL_GPL(md_ack_all_badblocks);
8385
8386
8387
8388
8389
8390
8391
8392
8393
8394
8395
8396
8397
8398static ssize_t
8399badblocks_show(struct badblocks *bb, char *page, int unack)
8400{
8401 size_t len;
8402 int i;
8403 u64 *p = bb->page;
8404 unsigned seq;
8405
8406 if (bb->shift < 0)
8407 return 0;
8408
8409retry:
8410 seq = read_seqbegin(&bb->lock);
8411
8412 len = 0;
8413 i = 0;
8414
8415 while (len < PAGE_SIZE && i < bb->count) {
8416 sector_t s = BB_OFFSET(p[i]);
8417 unsigned int length = BB_LEN(p[i]);
8418 int ack = BB_ACK(p[i]);
8419 i++;
8420
8421 if (unack && ack)
8422 continue;
8423
8424 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
8425 (unsigned long long)s << bb->shift,
8426 length << bb->shift);
8427 }
8428 if (unack && len == 0)
8429 bb->unacked_exist = 0;
8430
8431 if (read_seqretry(&bb->lock, seq))
8432 goto retry;
8433
8434 return len;
8435}
8436
8437#define DO_DEBUG 1
8438
8439static ssize_t
8440badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack)
8441{
8442 unsigned long long sector;
8443 int length;
8444 char newline;
8445#ifdef DO_DEBUG
8446
8447
8448
8449 int clear = 0;
8450 if (page[0] == '-') {
8451 clear = 1;
8452 page++;
8453 }
8454#endif
8455
8456 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
8457 case 3:
8458 if (newline != '\n')
8459 return -EINVAL;
8460 case 2:
8461 if (length <= 0)
8462 return -EINVAL;
8463 break;
8464 default:
8465 return -EINVAL;
8466 }
8467
8468#ifdef DO_DEBUG
8469 if (clear) {
8470 md_clear_badblocks(bb, sector, length);
8471 return len;
8472 }
8473#endif
8474 if (md_set_badblocks(bb, sector, length, !unack))
8475 return len;
8476 else
8477 return -ENOSPC;
8478}
8479
8480static int md_notify_reboot(struct notifier_block *this,
8481 unsigned long code, void *x)
8482{
8483 struct list_head *tmp;
8484 struct mddev *mddev;
8485 int need_delay = 0;
8486
8487 for_each_mddev(mddev, tmp) {
8488 if (mddev_trylock(mddev)) {
8489 if (mddev->pers)
8490 __md_stop_writes(mddev);
8491 mddev->safemode = 2;
8492 mddev_unlock(mddev);
8493 }
8494 need_delay = 1;
8495 }
8496
8497
8498
8499
8500
8501
8502 if (need_delay)
8503 mdelay(1000*1);
8504
8505 return NOTIFY_DONE;
8506}
8507
8508static struct notifier_block md_notifier = {
8509 .notifier_call = md_notify_reboot,
8510 .next = NULL,
8511 .priority = INT_MAX,
8512};
8513
8514static void md_geninit(void)
8515{
8516 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
8517
8518 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
8519}
8520
8521static int __init md_init(void)
8522{
8523 int ret = -ENOMEM;
8524
8525 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
8526 if (!md_wq)
8527 goto err_wq;
8528
8529 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
8530 if (!md_misc_wq)
8531 goto err_misc_wq;
8532
8533 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
8534 goto err_md;
8535
8536 if ((ret = register_blkdev(0, "mdp")) < 0)
8537 goto err_mdp;
8538 mdp_major = ret;
8539
8540 blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
8541 md_probe, NULL, NULL);
8542 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
8543 md_probe, NULL, NULL);
8544
8545 register_reboot_notifier(&md_notifier);
8546 raid_table_header = register_sysctl_table(raid_root_table);
8547
8548 md_geninit();
8549 return 0;
8550
8551err_mdp:
8552 unregister_blkdev(MD_MAJOR, "md");
8553err_md:
8554 destroy_workqueue(md_misc_wq);
8555err_misc_wq:
8556 destroy_workqueue(md_wq);
8557err_wq:
8558 return ret;
8559}
8560
8561#ifndef MODULE
8562
8563
8564
8565
8566
8567
8568static LIST_HEAD(all_detected_devices);
8569struct detected_devices_node {
8570 struct list_head list;
8571 dev_t dev;
8572};
8573
8574void md_autodetect_dev(dev_t dev)
8575{
8576 struct detected_devices_node *node_detected_dev;
8577
8578 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
8579 if (node_detected_dev) {
8580 node_detected_dev->dev = dev;
8581 list_add_tail(&node_detected_dev->list, &all_detected_devices);
8582 } else {
8583 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
8584 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
8585 }
8586}
8587
8588
8589static void autostart_arrays(int part)
8590{
8591 struct md_rdev *rdev;
8592 struct detected_devices_node *node_detected_dev;
8593 dev_t dev;
8594 int i_scanned, i_passed;
8595
8596 i_scanned = 0;
8597 i_passed = 0;
8598
8599 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
8600
8601 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
8602 i_scanned++;
8603 node_detected_dev = list_entry(all_detected_devices.next,
8604 struct detected_devices_node, list);
8605 list_del(&node_detected_dev->list);
8606 dev = node_detected_dev->dev;
8607 kfree(node_detected_dev);
8608 rdev = md_import_device(dev,0, 90);
8609 if (IS_ERR(rdev))
8610 continue;
8611
8612 if (test_bit(Faulty, &rdev->flags)) {
8613 MD_BUG();
8614 continue;
8615 }
8616 set_bit(AutoDetected, &rdev->flags);
8617 list_add(&rdev->same_set, &pending_raid_disks);
8618 i_passed++;
8619 }
8620
8621 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
8622 i_scanned, i_passed);
8623
8624 autorun_devices(part);
8625}
8626
8627#endif
8628
8629static __exit void md_exit(void)
8630{
8631 struct mddev *mddev;
8632 struct list_head *tmp;
8633
8634 blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
8635 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
8636
8637 unregister_blkdev(MD_MAJOR,"md");
8638 unregister_blkdev(mdp_major, "mdp");
8639 unregister_reboot_notifier(&md_notifier);
8640 unregister_sysctl_table(raid_table_header);
8641 remove_proc_entry("mdstat", NULL);
8642 for_each_mddev(mddev, tmp) {
8643 export_array(mddev);
8644 mddev->hold_active = 0;
8645 }
8646 destroy_workqueue(md_misc_wq);
8647 destroy_workqueue(md_wq);
8648}
8649
8650subsys_initcall(md_init);
8651module_exit(md_exit)
8652
8653static int get_ro(char *buffer, struct kernel_param *kp)
8654{
8655 return sprintf(buffer, "%d", start_readonly);
8656}
8657static int set_ro(const char *val, struct kernel_param *kp)
8658{
8659 char *e;
8660 int num = simple_strtoul(val, &e, 10);
8661 if (*val && (*e == '\0' || *e == '\n')) {
8662 start_readonly = num;
8663 return 0;
8664 }
8665 return -EINVAL;
8666}
8667
8668module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
8669module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
8670
8671module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
8672
8673EXPORT_SYMBOL(register_md_personality);
8674EXPORT_SYMBOL(unregister_md_personality);
8675EXPORT_SYMBOL(md_error);
8676EXPORT_SYMBOL(md_done_sync);
8677EXPORT_SYMBOL(md_write_start);
8678EXPORT_SYMBOL(md_write_end);
8679EXPORT_SYMBOL(md_register_thread);
8680EXPORT_SYMBOL(md_unregister_thread);
8681EXPORT_SYMBOL(md_wakeup_thread);
8682EXPORT_SYMBOL(md_check_recovery);
8683EXPORT_SYMBOL(md_reap_sync_thread);
8684MODULE_LICENSE("GPL");
8685MODULE_DESCRIPTION("MD RAID framework");
8686MODULE_ALIAS("md");
8687MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
8688