1
2
3
4
5
6
7
8#ifndef _MD_MD_H
9#define _MD_MD_H
10
11#include <linux/blkdev.h>
12#include <linux/backing-dev.h>
13#include <linux/badblocks.h>
14#include <linux/kobject.h>
15#include <linux/list.h>
16#include <linux/mm.h>
17#include <linux/mutex.h>
18#include <linux/timer.h>
19#include <linux/wait.h>
20#include <linux/workqueue.h>
21#include "md-cluster.h"
22
23#define MaxSector (~(sector_t)0)
24
25
26
27
28
29
30
31
32
33
34#define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
35
36
37
38
39struct serial_in_rdev {
40 struct rb_root_cached serial_rb;
41 spinlock_t serial_lock;
42 wait_queue_head_t serial_io_wait;
43};
44
45
46
47
48struct md_rdev {
49 struct list_head same_set;
50
51 sector_t sectors;
52 struct mddev *mddev;
53 int last_events;
54
55
56
57
58
59
60 struct block_device *meta_bdev;
61 struct block_device *bdev;
62
63 struct page *sb_page, *bb_page;
64 int sb_loaded;
65 __u64 sb_events;
66 sector_t data_offset;
67 sector_t new_data_offset;
68 sector_t sb_start;
69 int sb_size;
70 int preferred_minor;
71
72 struct kobject kobj;
73
74
75
76
77
78
79
80
81
82
83
84
85 unsigned long flags;
86 wait_queue_head_t blocked_wait;
87
88 int desc_nr;
89 int raid_disk;
90 int new_raid_disk;
91
92
93 int saved_raid_disk;
94
95
96
97 union {
98 sector_t recovery_offset;
99
100
101
102 sector_t journal_tail;
103
104
105
106 };
107
108 atomic_t nr_pending;
109
110
111
112 atomic_t read_errors;
113
114
115 time64_t last_read_error;
116
117
118 atomic_t corrected_errors;
119
120
121
122
123 struct serial_in_rdev *serial;
124
125 struct work_struct del_work;
126
127 struct kernfs_node *sysfs_state;
128
129
130 struct kernfs_node *sysfs_unack_badblocks;
131
132 struct kernfs_node *sysfs_badblocks;
133 struct badblocks badblocks;
134
135 struct {
136 short offset;
137
138 unsigned int size;
139 sector_t sector;
140 } ppl;
141};
142enum flag_bits {
143 Faulty,
144 In_sync,
145 Bitmap_sync,
146
147
148
149
150 WriteMostly,
151 AutoDetected,
152 Blocked,
153
154
155
156 WriteErrorSeen,
157
158
159 FaultRecorded,
160
161
162
163
164
165 BlockedBadBlocks,
166
167
168
169
170
171
172
173
174
175 WantReplacement,
176
177
178
179
180 Replacement,
181
182
183
184 Candidate,
185
186
187
188 Journal,
189
190
191
192
193 ClusterRemove,
194 RemoveSynchronized,
195
196
197
198
199 ExternalBbl,
200
201
202 FailFast,
203
204
205
206
207
208 LastDev,
209
210
211
212 CollisionCheck,
213
214
215
216};
217
218static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
219 sector_t *first_bad, int *bad_sectors)
220{
221 if (unlikely(rdev->badblocks.count)) {
222 int rv = badblocks_check(&rdev->badblocks, rdev->data_offset + s,
223 sectors,
224 first_bad, bad_sectors);
225 if (rv)
226 *first_bad -= rdev->data_offset;
227 return rv;
228 }
229 return 0;
230}
231extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
232 int is_new);
233extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
234 int is_new);
235struct md_cluster_info;
236
237
238enum mddev_flags {
239 MD_ARRAY_FIRST_USE,
240 MD_CLOSING,
241
242 MD_JOURNAL_CLEAN,
243 MD_HAS_JOURNAL,
244 MD_CLUSTER_RESYNC_LOCKED,
245
246
247 MD_FAILFAST_SUPPORTED,
248
249
250
251 MD_HAS_PPL,
252 MD_HAS_MULTIPLE_PPLS,
253 MD_ALLOW_SB_UPDATE,
254
255
256 MD_UPDATING_SB,
257
258
259 MD_NOT_READY,
260
261
262 MD_BROKEN,
263
264
265};
266
267enum mddev_sb_flags {
268 MD_SB_CHANGE_DEVS,
269 MD_SB_CHANGE_CLEAN,
270 MD_SB_CHANGE_PENDING,
271 MD_SB_NEED_REWRITE,
272};
273
274#define NR_SERIAL_INFOS 8
275
276struct serial_info {
277 struct rb_node node;
278 sector_t start;
279 sector_t last;
280 sector_t _subtree_last;
281};
282
283struct mddev {
284 void *private;
285 struct md_personality *pers;
286 dev_t unit;
287 int md_minor;
288 struct list_head disks;
289 unsigned long flags;
290 unsigned long sb_flags;
291
292 int suspended;
293 atomic_t active_io;
294 int ro;
295 int sysfs_active;
296
297
298
299 struct gendisk *gendisk;
300
301 struct kobject kobj;
302 int hold_active;
303#define UNTIL_IOCTL 1
304#define UNTIL_STOP 2
305
306
307 int major_version,
308 minor_version,
309 patch_version;
310 int persistent;
311 int external;
312
313 char metadata_type[17];
314 unsigned int chunk_sectors;
315 time64_t ctime, utime;
316 int level, layout;
317 char clevel[16];
318 int raid_disks;
319 int max_disks;
320 sector_t dev_sectors;
321
322 sector_t array_sectors;
323 int external_size;
324
325 __u64 events;
326
327
328
329
330
331 int can_decrease_events;
332
333 char uuid[16];
334
335
336
337
338
339
340 sector_t reshape_position;
341 int delta_disks, new_level, new_layout;
342 unsigned int new_chunk_sectors;
343 int reshape_backwards;
344
345 struct md_thread *thread;
346 struct md_thread *sync_thread;
347
348
349
350
351
352
353
354 char *last_sync_action;
355 sector_t curr_resync;
356
357
358
359
360
361
362 sector_t curr_resync_completed;
363 unsigned long resync_mark;
364 sector_t resync_mark_cnt;
365 sector_t curr_mark_cnt;
366
367 sector_t resync_max_sectors;
368
369 atomic64_t resync_mismatches;
370
371
372
373
374 sector_t suspend_lo;
375 sector_t suspend_hi;
376
377 int sync_speed_min;
378 int sync_speed_max;
379
380
381 int parallel_resync;
382
383 int ok_start_degraded;
384
385 unsigned long recovery;
386
387
388
389
390
391 int recovery_disabled;
392
393 int in_sync;
394
395
396
397
398
399
400
401
402
403 struct mutex open_mutex;
404 struct mutex reconfig_mutex;
405 atomic_t active;
406 atomic_t openers;
407
408 int changed;
409
410 int degraded;
411
412
413
414 atomic_t recovery_active;
415 wait_queue_head_t recovery_wait;
416 sector_t recovery_cp;
417 sector_t resync_min;
418
419 sector_t resync_max;
420
421
422 struct kernfs_node *sysfs_state;
423
424
425 struct kernfs_node *sysfs_action;
426 struct kernfs_node *sysfs_completed;
427 struct kernfs_node *sysfs_degraded;
428 struct kernfs_node *sysfs_level;
429
430 struct work_struct del_work;
431
432
433
434
435
436
437
438
439
440
441
442
443 spinlock_t lock;
444 wait_queue_head_t sb_wait;
445 atomic_t pending_writes;
446
447 unsigned int safemode;
448
449
450 unsigned int safemode_delay;
451 struct timer_list safemode_timer;
452 struct percpu_ref writes_pending;
453 int sync_checkers;
454 struct request_queue *queue;
455
456 struct bitmap *bitmap;
457 struct {
458 struct file *file;
459 loff_t offset;
460
461
462
463
464
465 unsigned long space;
466 loff_t default_offset;
467
468
469
470 unsigned long default_space;
471
472 struct mutex mutex;
473 unsigned long chunksize;
474 unsigned long daemon_sleep;
475 unsigned long max_write_behind;
476 int external;
477 int nodes;
478 char cluster_name[64];
479 } bitmap_info;
480
481 atomic_t max_corr_read_errors;
482 struct list_head all_mddevs;
483
484 struct attribute_group *to_remove;
485
486 struct bio_set bio_set;
487 struct bio_set sync_set;
488
489
490 mempool_t md_io_pool;
491
492
493
494
495
496 struct bio *flush_bio;
497 atomic_t flush_pending;
498 ktime_t start_flush, last_flush;
499
500
501 struct work_struct flush_work;
502 struct work_struct event_work;
503 mempool_t *serial_info_pool;
504 void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
505 struct md_cluster_info *cluster_info;
506 unsigned int good_device_nr;
507 unsigned int noio_flag;
508
509 bool has_superblocks:1;
510 bool fail_last_dev:1;
511 bool serialize_policy:1;
512};
513
514enum recovery_flags {
515
516
517
518 MD_RECOVERY_RUNNING,
519 MD_RECOVERY_SYNC,
520 MD_RECOVERY_RECOVER,
521 MD_RECOVERY_INTR,
522 MD_RECOVERY_DONE,
523 MD_RECOVERY_NEEDED,
524 MD_RECOVERY_REQUESTED,
525 MD_RECOVERY_CHECK,
526 MD_RECOVERY_RESHAPE,
527 MD_RECOVERY_FROZEN,
528 MD_RECOVERY_ERROR,
529 MD_RECOVERY_WAIT,
530 MD_RESYNCING_REMOTE,
531};
532
533static inline int __must_check mddev_lock(struct mddev *mddev)
534{
535 return mutex_lock_interruptible(&mddev->reconfig_mutex);
536}
537
538
539
540
541static inline void mddev_lock_nointr(struct mddev *mddev)
542{
543 mutex_lock(&mddev->reconfig_mutex);
544}
545
546static inline int mddev_trylock(struct mddev *mddev)
547{
548 return mutex_trylock(&mddev->reconfig_mutex);
549}
550extern void mddev_unlock(struct mddev *mddev);
551
552static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
553{
554 atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
555}
556
557static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
558{
559 atomic_add(nr_sectors, &bio->bi_disk->sync_io);
560}
561
562struct md_personality
563{
564 char *name;
565 int level;
566 struct list_head list;
567 struct module *owner;
568 bool __must_check (*make_request)(struct mddev *mddev, struct bio *bio);
569
570
571
572
573 int (*run)(struct mddev *mddev);
574
575 int (*start)(struct mddev *mddev);
576 void (*free)(struct mddev *mddev, void *priv);
577 void (*status)(struct seq_file *seq, struct mddev *mddev);
578
579
580
581 void (*error_handler)(struct mddev *mddev, struct md_rdev *rdev);
582 int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
583 int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev);
584 int (*spare_active) (struct mddev *mddev);
585 sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped);
586 int (*resize) (struct mddev *mddev, sector_t sectors);
587 sector_t (*size) (struct mddev *mddev, sector_t sectors, int raid_disks);
588 int (*check_reshape) (struct mddev *mddev);
589 int (*start_reshape) (struct mddev *mddev);
590 void (*finish_reshape) (struct mddev *mddev);
591 void (*update_reshape_pos) (struct mddev *mddev);
592
593
594
595
596 void (*quiesce) (struct mddev *mddev, int quiesce);
597
598
599
600
601
602
603
604
605
606 void *(*takeover) (struct mddev *mddev);
607
608 int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
609};
610
611struct md_sysfs_entry {
612 struct attribute attr;
613 ssize_t (*show)(struct mddev *, char *);
614 ssize_t (*store)(struct mddev *, const char *, size_t);
615};
616extern struct attribute_group md_bitmap_group;
617
618static inline struct kernfs_node *sysfs_get_dirent_safe(struct kernfs_node *sd, char *name)
619{
620 if (sd)
621 return sysfs_get_dirent(sd, name);
622 return sd;
623}
624static inline void sysfs_notify_dirent_safe(struct kernfs_node *sd)
625{
626 if (sd)
627 sysfs_notify_dirent(sd);
628}
629
630static inline char * mdname (struct mddev * mddev)
631{
632 return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
633}
634
635static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
636{
637 char nm[20];
638 if (!test_bit(Replacement, &rdev->flags) &&
639 !test_bit(Journal, &rdev->flags) &&
640 mddev->kobj.sd) {
641 sprintf(nm, "rd%d", rdev->raid_disk);
642 return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
643 } else
644 return 0;
645}
646
647static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
648{
649 char nm[20];
650 if (!test_bit(Replacement, &rdev->flags) &&
651 !test_bit(Journal, &rdev->flags) &&
652 mddev->kobj.sd) {
653 sprintf(nm, "rd%d", rdev->raid_disk);
654 sysfs_remove_link(&mddev->kobj, nm);
655 }
656}
657
658
659
660
661
662#define rdev_for_each_list(rdev, tmp, head) \
663 list_for_each_entry_safe(rdev, tmp, head, same_set)
664
665
666
667
668#define rdev_for_each(rdev, mddev) \
669 list_for_each_entry(rdev, &((mddev)->disks), same_set)
670
671#define rdev_for_each_safe(rdev, tmp, mddev) \
672 list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
673
674#define rdev_for_each_rcu(rdev, mddev) \
675 list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
676
677struct md_thread {
678 void (*run) (struct md_thread *thread);
679 struct mddev *mddev;
680 wait_queue_head_t wqueue;
681 unsigned long flags;
682 struct task_struct *tsk;
683 unsigned long timeout;
684 void *private;
685};
686
687#define THREAD_WAKEUP 0
688
689static inline void safe_put_page(struct page *p)
690{
691 if (p) put_page(p);
692}
693
694extern int register_md_personality(struct md_personality *p);
695extern int unregister_md_personality(struct md_personality *p);
696extern int register_md_cluster_operations(struct md_cluster_operations *ops,
697 struct module *module);
698extern int unregister_md_cluster_operations(void);
699extern int md_setup_cluster(struct mddev *mddev, int nodes);
700extern void md_cluster_stop(struct mddev *mddev);
701extern struct md_thread *md_register_thread(
702 void (*run)(struct md_thread *thread),
703 struct mddev *mddev,
704 const char *name);
705extern void md_unregister_thread(struct md_thread **threadp);
706extern void md_wakeup_thread(struct md_thread *thread);
707extern void md_check_recovery(struct mddev *mddev);
708extern void md_reap_sync_thread(struct mddev *mddev);
709extern int mddev_init_writes_pending(struct mddev *mddev);
710extern bool md_write_start(struct mddev *mddev, struct bio *bi);
711extern void md_write_inc(struct mddev *mddev, struct bio *bi);
712extern void md_write_end(struct mddev *mddev);
713extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
714extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
715extern void md_finish_reshape(struct mddev *mddev);
716
717extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
718extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
719 sector_t sector, int size, struct page *page);
720extern int md_super_wait(struct mddev *mddev);
721extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
722 struct page *page, int op, int op_flags,
723 bool metadata_op);
724extern void md_do_sync(struct md_thread *thread);
725extern void md_new_event(struct mddev *mddev);
726extern void md_allow_write(struct mddev *mddev);
727extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
728extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
729extern int md_check_no_bitmap(struct mddev *mddev);
730extern int md_integrity_register(struct mddev *mddev);
731extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
732extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
733
734extern void mddev_init(struct mddev *mddev);
735extern int md_run(struct mddev *mddev);
736extern int md_start(struct mddev *mddev);
737extern void md_stop(struct mddev *mddev);
738extern void md_stop_writes(struct mddev *mddev);
739extern int md_rdev_init(struct md_rdev *rdev);
740extern void md_rdev_clear(struct md_rdev *rdev);
741
742extern void md_handle_request(struct mddev *mddev, struct bio *bio);
743extern void mddev_suspend(struct mddev *mddev);
744extern void mddev_resume(struct mddev *mddev);
745extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
746 struct mddev *mddev);
747
748extern void md_reload_sb(struct mddev *mddev, int raid_disk);
749extern void md_update_sb(struct mddev *mddev, int force);
750extern void md_kick_rdev_from_array(struct md_rdev * rdev);
751extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
752 bool is_suspend);
753extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
754 bool is_suspend);
755struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
756struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
757
758static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
759{
760 int flags = rdev->bdev->bd_disk->flags;
761
762 if (!(flags & GENHD_FL_UP)) {
763 if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
764 pr_warn("md: %s: %s array has a missing/failed member\n",
765 mdname(rdev->mddev), md_type);
766 return true;
767 }
768 return false;
769}
770
771static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
772{
773 int faulty = test_bit(Faulty, &rdev->flags);
774 if (atomic_dec_and_test(&rdev->nr_pending) && faulty) {
775 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
776 md_wakeup_thread(mddev->thread);
777 }
778}
779
780extern struct md_cluster_operations *md_cluster_ops;
781static inline int mddev_is_clustered(struct mddev *mddev)
782{
783 return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
784}
785
786
787static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
788 unsigned long unsupported_flags)
789{
790 mddev->flags &= ~unsupported_flags;
791}
792
793static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio)
794{
795 if (bio_op(bio) == REQ_OP_WRITE_SAME &&
796 !bio->bi_disk->queue->limits.max_write_same_sectors)
797 mddev->queue->limits.max_write_same_sectors = 0;
798}
799
800static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio)
801{
802 if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
803 !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
804 mddev->queue->limits.max_write_zeroes_sectors = 0;
805}
806
807struct mdu_array_info_s;
808struct mdu_disk_info_s;
809
810extern int mdp_major;
811void md_autostart_arrays(int part);
812int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info);
813int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info);
814int do_md_run(struct mddev *mddev);
815
816extern const struct block_device_operations md_fops;
817
818#endif
819