1
2
3
4
5
6
7
8#ifndef _MD_MD_H
9#define _MD_MD_H
10
11#include <linux/blkdev.h>
12#include <linux/backing-dev.h>
13#include <linux/badblocks.h>
14#include <linux/kobject.h>
15#include <linux/list.h>
16#include <linux/mm.h>
17#include <linux/mutex.h>
18#include <linux/timer.h>
19#include <linux/wait.h>
20#include <linux/workqueue.h>
21#include "md-cluster.h"
22
23#define MaxSector (~(sector_t)0)
24
25
26
27
28
29
30
31
32
33
34#define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
35
36
37
38
39struct serial_in_rdev {
40 struct rb_root_cached serial_rb;
41 spinlock_t serial_lock;
42 wait_queue_head_t serial_io_wait;
43};
44
45
46
47
48struct md_rdev {
49 struct list_head same_set;
50
51 sector_t sectors;
52 struct mddev *mddev;
53 int last_events;
54
55
56
57
58
59
60 struct block_device *meta_bdev;
61 struct block_device *bdev;
62
63 struct page *sb_page, *bb_page;
64 int sb_loaded;
65 __u64 sb_events;
66 sector_t data_offset;
67 sector_t new_data_offset;
68 sector_t sb_start;
69 int sb_size;
70 int preferred_minor;
71
72 struct kobject kobj;
73
74
75
76
77
78
79
80
81
82
83
84
85 unsigned long flags;
86 wait_queue_head_t blocked_wait;
87
88 int desc_nr;
89 int raid_disk;
90 int new_raid_disk;
91
92
93 int saved_raid_disk;
94
95
96
97 union {
98 sector_t recovery_offset;
99
100
101
102 sector_t journal_tail;
103
104
105
106 };
107
108 atomic_t nr_pending;
109
110
111
112 atomic_t read_errors;
113
114
115 time64_t last_read_error;
116
117
118 atomic_t corrected_errors;
119
120
121
122
123 struct serial_in_rdev *serial;
124
125 struct work_struct del_work;
126
127 struct kernfs_node *sysfs_state;
128
129
130 struct kernfs_node *sysfs_unack_badblocks;
131
132 struct kernfs_node *sysfs_badblocks;
133 struct badblocks badblocks;
134
135 struct {
136 short offset;
137
138 unsigned int size;
139 sector_t sector;
140 } ppl;
141};
142enum flag_bits {
143 Faulty,
144 In_sync,
145 Bitmap_sync,
146
147
148
149
150 WriteMostly,
151 AutoDetected,
152 Blocked,
153
154
155
156 WriteErrorSeen,
157
158
159 FaultRecorded,
160
161
162
163
164
165 BlockedBadBlocks,
166
167
168
169
170
171
172
173
174
175 WantReplacement,
176
177
178
179
180 Replacement,
181
182
183
184 Candidate,
185
186
187
188 Journal,
189
190
191
192
193 ClusterRemove,
194 RemoveSynchronized,
195
196
197
198
199 ExternalBbl,
200
201
202 FailFast,
203
204
205
206
207
208 LastDev,
209
210
211
212 CollisionCheck,
213
214
215
216};
217
218static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
219 sector_t *first_bad, int *bad_sectors)
220{
221 if (unlikely(rdev->badblocks.count)) {
222 int rv = badblocks_check(&rdev->badblocks, rdev->data_offset + s,
223 sectors,
224 first_bad, bad_sectors);
225 if (rv)
226 *first_bad -= rdev->data_offset;
227 return rv;
228 }
229 return 0;
230}
231extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
232 int is_new);
233extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
234 int is_new);
235struct md_cluster_info;
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260enum mddev_flags {
261 MD_ARRAY_FIRST_USE,
262 MD_CLOSING,
263 MD_JOURNAL_CLEAN,
264 MD_HAS_JOURNAL,
265 MD_CLUSTER_RESYNC_LOCKED,
266 MD_FAILFAST_SUPPORTED,
267 MD_HAS_PPL,
268 MD_HAS_MULTIPLE_PPLS,
269 MD_ALLOW_SB_UPDATE,
270 MD_UPDATING_SB,
271 MD_NOT_READY,
272 MD_BROKEN,
273};
274
275enum mddev_sb_flags {
276 MD_SB_CHANGE_DEVS,
277 MD_SB_CHANGE_CLEAN,
278 MD_SB_CHANGE_PENDING,
279 MD_SB_NEED_REWRITE,
280};
281
282#define NR_SERIAL_INFOS 8
283
284struct serial_info {
285 struct rb_node node;
286 sector_t start;
287 sector_t last;
288 sector_t _subtree_last;
289};
290
291struct mddev {
292 void *private;
293 struct md_personality *pers;
294 dev_t unit;
295 int md_minor;
296 struct list_head disks;
297 unsigned long flags;
298 unsigned long sb_flags;
299
300 int suspended;
301 atomic_t active_io;
302 int ro;
303 int sysfs_active;
304
305
306
307 struct gendisk *gendisk;
308
309 struct kobject kobj;
310 int hold_active;
311#define UNTIL_IOCTL 1
312#define UNTIL_STOP 2
313
314
315 int major_version,
316 minor_version,
317 patch_version;
318 int persistent;
319 int external;
320
321 char metadata_type[17];
322 int chunk_sectors;
323 time64_t ctime, utime;
324 int level, layout;
325 char clevel[16];
326 int raid_disks;
327 int max_disks;
328 sector_t dev_sectors;
329
330 sector_t array_sectors;
331 int external_size;
332
333 __u64 events;
334
335
336
337
338
339 int can_decrease_events;
340
341 char uuid[16];
342
343
344
345
346
347
348 sector_t reshape_position;
349 int delta_disks, new_level, new_layout;
350 int new_chunk_sectors;
351 int reshape_backwards;
352
353 struct md_thread *thread;
354 struct md_thread *sync_thread;
355
356
357
358
359
360
361
362 char *last_sync_action;
363 sector_t curr_resync;
364
365
366
367
368
369
370 sector_t curr_resync_completed;
371 unsigned long resync_mark;
372 sector_t resync_mark_cnt;
373 sector_t curr_mark_cnt;
374
375 sector_t resync_max_sectors;
376
377 atomic64_t resync_mismatches;
378
379
380
381
382 sector_t suspend_lo;
383 sector_t suspend_hi;
384
385 int sync_speed_min;
386 int sync_speed_max;
387
388
389 int parallel_resync;
390
391 int ok_start_degraded;
392
393 unsigned long recovery;
394
395
396
397
398
399 int recovery_disabled;
400
401 int in_sync;
402
403
404
405
406
407
408
409
410
411 struct mutex open_mutex;
412 struct mutex reconfig_mutex;
413 atomic_t active;
414 atomic_t openers;
415
416 int changed;
417
418 int degraded;
419
420
421
422 atomic_t recovery_active;
423 wait_queue_head_t recovery_wait;
424 sector_t recovery_cp;
425 sector_t resync_min;
426
427 sector_t resync_max;
428
429
430 struct kernfs_node *sysfs_state;
431
432
433 struct kernfs_node *sysfs_action;
434 struct kernfs_node *sysfs_completed;
435 struct kernfs_node *sysfs_degraded;
436 struct kernfs_node *sysfs_level;
437
438 struct work_struct del_work;
439
440
441
442
443
444
445
446
447
448
449
450
451 spinlock_t lock;
452 wait_queue_head_t sb_wait;
453 atomic_t pending_writes;
454
455 unsigned int safemode;
456
457
458 unsigned int safemode_delay;
459 struct timer_list safemode_timer;
460 struct percpu_ref writes_pending;
461 int sync_checkers;
462 struct request_queue *queue;
463
464 struct bitmap *bitmap;
465 struct {
466 struct file *file;
467 loff_t offset;
468
469
470
471
472
473 unsigned long space;
474 loff_t default_offset;
475
476
477
478 unsigned long default_space;
479
480 struct mutex mutex;
481 unsigned long chunksize;
482 unsigned long daemon_sleep;
483 unsigned long max_write_behind;
484 int external;
485 int nodes;
486 char cluster_name[64];
487 } bitmap_info;
488
489 atomic_t max_corr_read_errors;
490 struct list_head all_mddevs;
491
492 const struct attribute_group *to_remove;
493
494 struct bio_set bio_set;
495 struct bio_set sync_set;
496
497
498 struct bio_set io_acct_set;
499
500
501
502
503
504 struct bio *flush_bio;
505 atomic_t flush_pending;
506 ktime_t start_flush, prev_flush_start;
507
508
509 struct work_struct flush_work;
510 struct work_struct event_work;
511 mempool_t *serial_info_pool;
512 void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
513 struct md_cluster_info *cluster_info;
514 unsigned int good_device_nr;
515 unsigned int noio_flag;
516
517 bool has_superblocks:1;
518 bool fail_last_dev:1;
519 bool serialize_policy:1;
520};
521
522enum recovery_flags {
523
524
525
526 MD_RECOVERY_RUNNING,
527 MD_RECOVERY_SYNC,
528 MD_RECOVERY_RECOVER,
529 MD_RECOVERY_INTR,
530 MD_RECOVERY_DONE,
531 MD_RECOVERY_NEEDED,
532 MD_RECOVERY_REQUESTED,
533 MD_RECOVERY_CHECK,
534 MD_RECOVERY_RESHAPE,
535 MD_RECOVERY_FROZEN,
536 MD_RECOVERY_ERROR,
537 MD_RECOVERY_WAIT,
538 MD_RESYNCING_REMOTE,
539};
540
541static inline int __must_check mddev_lock(struct mddev *mddev)
542{
543 return mutex_lock_interruptible(&mddev->reconfig_mutex);
544}
545
546
547
548
549static inline void mddev_lock_nointr(struct mddev *mddev)
550{
551 mutex_lock(&mddev->reconfig_mutex);
552}
553
554static inline int mddev_trylock(struct mddev *mddev)
555{
556 return mutex_trylock(&mddev->reconfig_mutex);
557}
558extern void mddev_unlock(struct mddev *mddev);
559
560static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
561{
562 atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
563}
564
565static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
566{
567 md_sync_acct(bio->bi_bdev, nr_sectors);
568}
569
570struct md_personality
571{
572 char *name;
573 int level;
574 struct list_head list;
575 struct module *owner;
576 bool __must_check (*make_request)(struct mddev *mddev, struct bio *bio);
577
578
579
580
581 int (*run)(struct mddev *mddev);
582
583 int (*start)(struct mddev *mddev);
584 void (*free)(struct mddev *mddev, void *priv);
585 void (*status)(struct seq_file *seq, struct mddev *mddev);
586
587
588
589 void (*error_handler)(struct mddev *mddev, struct md_rdev *rdev);
590 int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
591 int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev);
592 int (*spare_active) (struct mddev *mddev);
593 sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped);
594 int (*resize) (struct mddev *mddev, sector_t sectors);
595 sector_t (*size) (struct mddev *mddev, sector_t sectors, int raid_disks);
596 int (*check_reshape) (struct mddev *mddev);
597 int (*start_reshape) (struct mddev *mddev);
598 void (*finish_reshape) (struct mddev *mddev);
599 void (*update_reshape_pos) (struct mddev *mddev);
600
601
602
603
604 void (*quiesce) (struct mddev *mddev, int quiesce);
605
606
607
608
609
610
611
612
613
614 void *(*takeover) (struct mddev *mddev);
615
616 int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
617};
618
619struct md_sysfs_entry {
620 struct attribute attr;
621 ssize_t (*show)(struct mddev *, char *);
622 ssize_t (*store)(struct mddev *, const char *, size_t);
623};
624extern const struct attribute_group md_bitmap_group;
625
626static inline struct kernfs_node *sysfs_get_dirent_safe(struct kernfs_node *sd, char *name)
627{
628 if (sd)
629 return sysfs_get_dirent(sd, name);
630 return sd;
631}
632static inline void sysfs_notify_dirent_safe(struct kernfs_node *sd)
633{
634 if (sd)
635 sysfs_notify_dirent(sd);
636}
637
638static inline char * mdname (struct mddev * mddev)
639{
640 return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
641}
642
643static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
644{
645 char nm[20];
646 if (!test_bit(Replacement, &rdev->flags) &&
647 !test_bit(Journal, &rdev->flags) &&
648 mddev->kobj.sd) {
649 sprintf(nm, "rd%d", rdev->raid_disk);
650 return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
651 } else
652 return 0;
653}
654
655static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
656{
657 char nm[20];
658 if (!test_bit(Replacement, &rdev->flags) &&
659 !test_bit(Journal, &rdev->flags) &&
660 mddev->kobj.sd) {
661 sprintf(nm, "rd%d", rdev->raid_disk);
662 sysfs_remove_link(&mddev->kobj, nm);
663 }
664}
665
666
667
668
669
670#define rdev_for_each_list(rdev, tmp, head) \
671 list_for_each_entry_safe(rdev, tmp, head, same_set)
672
673
674
675
676#define rdev_for_each(rdev, mddev) \
677 list_for_each_entry(rdev, &((mddev)->disks), same_set)
678
679#define rdev_for_each_safe(rdev, tmp, mddev) \
680 list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
681
682#define rdev_for_each_rcu(rdev, mddev) \
683 list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
684
685struct md_thread {
686 void (*run) (struct md_thread *thread);
687 struct mddev *mddev;
688 wait_queue_head_t wqueue;
689 unsigned long flags;
690 struct task_struct *tsk;
691 unsigned long timeout;
692 void *private;
693};
694
695struct md_io_acct {
696 struct bio *orig_bio;
697 unsigned long start_time;
698 struct bio bio_clone;
699};
700
701#define THREAD_WAKEUP 0
702
703static inline void safe_put_page(struct page *p)
704{
705 if (p) put_page(p);
706}
707
708extern int register_md_personality(struct md_personality *p);
709extern int unregister_md_personality(struct md_personality *p);
710extern int register_md_cluster_operations(struct md_cluster_operations *ops,
711 struct module *module);
712extern int unregister_md_cluster_operations(void);
713extern int md_setup_cluster(struct mddev *mddev, int nodes);
714extern void md_cluster_stop(struct mddev *mddev);
715extern struct md_thread *md_register_thread(
716 void (*run)(struct md_thread *thread),
717 struct mddev *mddev,
718 const char *name);
719extern void md_unregister_thread(struct md_thread **threadp);
720extern void md_wakeup_thread(struct md_thread *thread);
721extern void md_check_recovery(struct mddev *mddev);
722extern void md_reap_sync_thread(struct mddev *mddev);
723extern int mddev_init_writes_pending(struct mddev *mddev);
724extern bool md_write_start(struct mddev *mddev, struct bio *bi);
725extern void md_write_inc(struct mddev *mddev, struct bio *bi);
726extern void md_write_end(struct mddev *mddev);
727extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
728extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
729extern void md_finish_reshape(struct mddev *mddev);
730void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
731 struct bio *bio, sector_t start, sector_t size);
732int acct_bioset_init(struct mddev *mddev);
733void acct_bioset_exit(struct mddev *mddev);
734void md_account_bio(struct mddev *mddev, struct bio **bio);
735
736extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
737extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
738 sector_t sector, int size, struct page *page);
739extern int md_super_wait(struct mddev *mddev);
740extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
741 struct page *page, int op, int op_flags,
742 bool metadata_op);
743extern void md_do_sync(struct md_thread *thread);
744extern void md_new_event(void);
745extern void md_allow_write(struct mddev *mddev);
746extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
747extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
748extern int md_check_no_bitmap(struct mddev *mddev);
749extern int md_integrity_register(struct mddev *mddev);
750extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
751extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
752
753extern void mddev_init(struct mddev *mddev);
754extern int md_run(struct mddev *mddev);
755extern int md_start(struct mddev *mddev);
756extern void md_stop(struct mddev *mddev);
757extern void md_stop_writes(struct mddev *mddev);
758extern int md_rdev_init(struct md_rdev *rdev);
759extern void md_rdev_clear(struct md_rdev *rdev);
760
761extern void md_handle_request(struct mddev *mddev, struct bio *bio);
762extern void mddev_suspend(struct mddev *mddev);
763extern void mddev_resume(struct mddev *mddev);
764
765extern void md_reload_sb(struct mddev *mddev, int raid_disk);
766extern void md_update_sb(struct mddev *mddev, int force);
767extern void md_kick_rdev_from_array(struct md_rdev * rdev);
768extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
769 bool is_suspend);
770extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
771 bool is_suspend);
772struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
773struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
774
775static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
776{
777 if (!disk_live(rdev->bdev->bd_disk)) {
778 if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
779 pr_warn("md: %s: %s array has a missing/failed member\n",
780 mdname(rdev->mddev), md_type);
781 return true;
782 }
783 return false;
784}
785
786static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
787{
788 int faulty = test_bit(Faulty, &rdev->flags);
789 if (atomic_dec_and_test(&rdev->nr_pending) && faulty) {
790 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
791 md_wakeup_thread(mddev->thread);
792 }
793}
794
795extern struct md_cluster_operations *md_cluster_ops;
796static inline int mddev_is_clustered(struct mddev *mddev)
797{
798 return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
799}
800
801
802static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
803 unsigned long unsupported_flags)
804{
805 mddev->flags &= ~unsupported_flags;
806}
807
808static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio)
809{
810 if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
811 !bio->bi_bdev->bd_disk->queue->limits.max_write_zeroes_sectors)
812 mddev->queue->limits.max_write_zeroes_sectors = 0;
813}
814
815struct mdu_array_info_s;
816struct mdu_disk_info_s;
817
818extern int mdp_major;
819void md_autostart_arrays(int part);
820int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info);
821int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info);
822int do_md_run(struct mddev *mddev);
823
824extern const struct block_device_operations md_fops;
825
826#endif
827