1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#ifndef _MD_MD_H
16#define _MD_MD_H
17
18#include <linux/blkdev.h>
19#include <linux/backing-dev.h>
20#include <linux/badblocks.h>
21#include <linux/kobject.h>
22#include <linux/list.h>
23#include <linux/mm.h>
24#include <linux/mutex.h>
25#include <linux/timer.h>
26#include <linux/wait.h>
27#include <linux/workqueue.h>
28#include "md-cluster.h"
29
30#define MaxSector (~(sector_t)0)
31
32
33
34
35
36
37
38
39
40
41#define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
42
43
44
45struct md_rdev {
46 struct list_head same_set;
47
48 sector_t sectors;
49 struct mddev *mddev;
50 int last_events;
51
52
53
54
55
56
57 struct block_device *meta_bdev;
58 struct block_device *bdev;
59
60 struct page *sb_page, *bb_page;
61 int sb_loaded;
62 __u64 sb_events;
63 sector_t data_offset;
64 sector_t new_data_offset;
65 sector_t sb_start;
66 int sb_size;
67 int preferred_minor;
68
69 struct kobject kobj;
70
71
72
73
74
75
76
77
78
79
80
81
82 unsigned long flags;
83 wait_queue_head_t blocked_wait;
84
85 int desc_nr;
86 int raid_disk;
87 int new_raid_disk;
88
89
90 int saved_raid_disk;
91
92
93
94 union {
95 sector_t recovery_offset;
96
97
98
99 sector_t journal_tail;
100
101
102
103 };
104
105 atomic_t nr_pending;
106
107
108
109 atomic_t read_errors;
110
111
112 time64_t last_read_error;
113
114
115 atomic_t corrected_errors;
116
117
118
119 struct work_struct del_work;
120
121 struct kernfs_node *sysfs_state;
122
123
124 struct badblocks badblocks;
125
126 struct {
127 short offset;
128
129 unsigned int size;
130 sector_t sector;
131 } ppl;
132};
133enum flag_bits {
134 Faulty,
135 In_sync,
136 Bitmap_sync,
137
138
139
140
141 WriteMostly,
142 AutoDetected,
143 Blocked,
144
145
146
147 WriteErrorSeen,
148
149
150 FaultRecorded,
151
152
153
154
155
156 BlockedBadBlocks,
157
158
159
160
161
162
163
164
165
166 WantReplacement,
167
168
169
170
171 Replacement,
172
173
174
175 Candidate,
176
177
178
179 Journal,
180
181
182
183
184 ClusterRemove,
185 RemoveSynchronized,
186
187
188
189
190 ExternalBbl,
191
192
193 FailFast,
194
195
196
197
198
199 LastDev,
200
201
202
203};
204
205static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
206 sector_t *first_bad, int *bad_sectors)
207{
208 if (unlikely(rdev->badblocks.count)) {
209 int rv = badblocks_check(&rdev->badblocks, rdev->data_offset + s,
210 sectors,
211 first_bad, bad_sectors);
212 if (rv)
213 *first_bad -= rdev->data_offset;
214 return rv;
215 }
216 return 0;
217}
218extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
219 int is_new);
220extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
221 int is_new);
222struct md_cluster_info;
223
224
225enum mddev_flags {
226 MD_ARRAY_FIRST_USE,
227 MD_CLOSING,
228
229 MD_JOURNAL_CLEAN,
230 MD_HAS_JOURNAL,
231 MD_CLUSTER_RESYNC_LOCKED,
232
233
234 MD_FAILFAST_SUPPORTED,
235
236
237
238 MD_HAS_PPL,
239 MD_HAS_MULTIPLE_PPLS,
240 MD_ALLOW_SB_UPDATE,
241
242
243 MD_UPDATING_SB,
244
245
246};
247
248enum mddev_sb_flags {
249 MD_SB_CHANGE_DEVS,
250 MD_SB_CHANGE_CLEAN,
251 MD_SB_CHANGE_PENDING,
252 MD_SB_NEED_REWRITE,
253};
254
255struct mddev {
256 void *private;
257 struct md_personality *pers;
258 dev_t unit;
259 int md_minor;
260 struct list_head disks;
261 unsigned long flags;
262 unsigned long sb_flags;
263
264 int suspended;
265 atomic_t active_io;
266 int ro;
267 int sysfs_active;
268
269
270
271 struct gendisk *gendisk;
272
273 struct kobject kobj;
274 int hold_active;
275#define UNTIL_IOCTL 1
276#define UNTIL_STOP 2
277
278
279 int major_version,
280 minor_version,
281 patch_version;
282 int persistent;
283 int external;
284
285 char metadata_type[17];
286 int chunk_sectors;
287 time64_t ctime, utime;
288 int level, layout;
289 char clevel[16];
290 int raid_disks;
291 int max_disks;
292 sector_t dev_sectors;
293
294 sector_t array_sectors;
295 int external_size;
296
297 __u64 events;
298
299
300
301
302
303 int can_decrease_events;
304
305 char uuid[16];
306
307
308
309
310
311
312 sector_t reshape_position;
313 int delta_disks, new_level, new_layout;
314 int new_chunk_sectors;
315 int reshape_backwards;
316
317 struct md_thread *thread;
318 struct md_thread *sync_thread;
319
320
321
322
323
324
325
326 char *last_sync_action;
327 sector_t curr_resync;
328
329
330
331
332
333
334 sector_t curr_resync_completed;
335 unsigned long resync_mark;
336 sector_t resync_mark_cnt;
337 sector_t curr_mark_cnt;
338
339 sector_t resync_max_sectors;
340
341 atomic64_t resync_mismatches;
342
343
344
345
346 sector_t suspend_lo;
347 sector_t suspend_hi;
348
349 int sync_speed_min;
350 int sync_speed_max;
351
352
353 int parallel_resync;
354
355 int ok_start_degraded;
356
357 unsigned long recovery;
358
359
360
361
362
363 int recovery_disabled;
364
365 int in_sync;
366
367
368
369
370
371
372
373
374
375 struct mutex open_mutex;
376 struct mutex reconfig_mutex;
377 atomic_t active;
378 atomic_t openers;
379
380 int changed;
381
382 int degraded;
383
384
385
386 atomic_t recovery_active;
387 wait_queue_head_t recovery_wait;
388 sector_t recovery_cp;
389 sector_t resync_min;
390
391 sector_t resync_max;
392
393
394 struct kernfs_node *sysfs_state;
395
396
397 struct kernfs_node *sysfs_action;
398
399 struct work_struct del_work;
400
401
402
403
404
405
406
407
408
409
410
411
412 spinlock_t lock;
413 wait_queue_head_t sb_wait;
414 atomic_t pending_writes;
415
416 unsigned int safemode;
417
418
419 unsigned int safemode_delay;
420 struct timer_list safemode_timer;
421 struct percpu_ref writes_pending;
422 int sync_checkers;
423 struct request_queue *queue;
424
425 struct bitmap *bitmap;
426 struct {
427 struct file *file;
428 loff_t offset;
429
430
431
432
433
434 unsigned long space;
435 loff_t default_offset;
436
437
438
439 unsigned long default_space;
440
441 struct mutex mutex;
442 unsigned long chunksize;
443 unsigned long daemon_sleep;
444 unsigned long max_write_behind;
445 int external;
446 int nodes;
447 char cluster_name[64];
448 } bitmap_info;
449
450 atomic_t max_corr_read_errors;
451 struct list_head all_mddevs;
452
453 struct attribute_group *to_remove;
454
455 struct bio_set *bio_set;
456 struct bio_set *sync_set;
457
458
459
460
461
462
463
464 struct bio *flush_bio;
465 atomic_t flush_pending;
466 struct work_struct flush_work;
467 struct work_struct event_work;
468 void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
469 struct md_cluster_info *cluster_info;
470 unsigned int good_device_nr;
471
472 bool has_superblocks:1;
473};
474
475enum recovery_flags {
476
477
478
479 MD_RECOVERY_RUNNING,
480 MD_RECOVERY_SYNC,
481 MD_RECOVERY_RECOVER,
482 MD_RECOVERY_INTR,
483 MD_RECOVERY_DONE,
484 MD_RECOVERY_NEEDED,
485 MD_RECOVERY_REQUESTED,
486 MD_RECOVERY_CHECK,
487 MD_RECOVERY_RESHAPE,
488 MD_RECOVERY_FROZEN,
489 MD_RECOVERY_ERROR,
490 MD_RECOVERY_WAIT,
491};
492
493static inline int __must_check mddev_lock(struct mddev *mddev)
494{
495 return mutex_lock_interruptible(&mddev->reconfig_mutex);
496}
497
498
499
500
501static inline void mddev_lock_nointr(struct mddev *mddev)
502{
503 mutex_lock(&mddev->reconfig_mutex);
504}
505
506static inline int mddev_trylock(struct mddev *mddev)
507{
508 return mutex_trylock(&mddev->reconfig_mutex);
509}
510extern void mddev_unlock(struct mddev *mddev);
511
512static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
513{
514 atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
515}
516
517static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
518{
519 atomic_add(nr_sectors, &bio->bi_disk->sync_io);
520}
521
522struct md_personality
523{
524 char *name;
525 int level;
526 struct list_head list;
527 struct module *owner;
528 bool (*make_request)(struct mddev *mddev, struct bio *bio);
529
530
531
532
533 int (*run)(struct mddev *mddev);
534
535 int (*start)(struct mddev *mddev);
536 void (*free)(struct mddev *mddev, void *priv);
537 void (*status)(struct seq_file *seq, struct mddev *mddev);
538
539
540
541 void (*error_handler)(struct mddev *mddev, struct md_rdev *rdev);
542 int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
543 int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev);
544 int (*spare_active) (struct mddev *mddev);
545 sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped);
546 int (*resize) (struct mddev *mddev, sector_t sectors);
547 sector_t (*size) (struct mddev *mddev, sector_t sectors, int raid_disks);
548 int (*check_reshape) (struct mddev *mddev);
549 int (*start_reshape) (struct mddev *mddev);
550 void (*finish_reshape) (struct mddev *mddev);
551
552
553
554
555 void (*quiesce) (struct mddev *mddev, int quiesce);
556
557
558
559
560
561
562
563
564
565 void *(*takeover) (struct mddev *mddev);
566
567
568 int (*congested)(struct mddev *mddev, int bits);
569
570 int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
571};
572
573struct md_sysfs_entry {
574 struct attribute attr;
575 ssize_t (*show)(struct mddev *, char *);
576 ssize_t (*store)(struct mddev *, const char *, size_t);
577};
578extern struct attribute_group md_bitmap_group;
579
580static inline struct kernfs_node *sysfs_get_dirent_safe(struct kernfs_node *sd, char *name)
581{
582 if (sd)
583 return sysfs_get_dirent(sd, name);
584 return sd;
585}
586static inline void sysfs_notify_dirent_safe(struct kernfs_node *sd)
587{
588 if (sd)
589 sysfs_notify_dirent(sd);
590}
591
592static inline char * mdname (struct mddev * mddev)
593{
594 return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
595}
596
597static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
598{
599 char nm[20];
600 if (!test_bit(Replacement, &rdev->flags) &&
601 !test_bit(Journal, &rdev->flags) &&
602 mddev->kobj.sd) {
603 sprintf(nm, "rd%d", rdev->raid_disk);
604 return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
605 } else
606 return 0;
607}
608
609static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
610{
611 char nm[20];
612 if (!test_bit(Replacement, &rdev->flags) &&
613 !test_bit(Journal, &rdev->flags) &&
614 mddev->kobj.sd) {
615 sprintf(nm, "rd%d", rdev->raid_disk);
616 sysfs_remove_link(&mddev->kobj, nm);
617 }
618}
619
620
621
622
623
624#define rdev_for_each_list(rdev, tmp, head) \
625 list_for_each_entry_safe(rdev, tmp, head, same_set)
626
627
628
629
630#define rdev_for_each(rdev, mddev) \
631 list_for_each_entry(rdev, &((mddev)->disks), same_set)
632
633#define rdev_for_each_safe(rdev, tmp, mddev) \
634 list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
635
636#define rdev_for_each_rcu(rdev, mddev) \
637 list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
638
639struct md_thread {
640 void (*run) (struct md_thread *thread);
641 struct mddev *mddev;
642 wait_queue_head_t wqueue;
643 unsigned long flags;
644 struct task_struct *tsk;
645 unsigned long timeout;
646 void *private;
647};
648
649#define THREAD_WAKEUP 0
650
651static inline void safe_put_page(struct page *p)
652{
653 if (p) put_page(p);
654}
655
656extern int register_md_personality(struct md_personality *p);
657extern int unregister_md_personality(struct md_personality *p);
658extern int register_md_cluster_operations(struct md_cluster_operations *ops,
659 struct module *module);
660extern int unregister_md_cluster_operations(void);
661extern int md_setup_cluster(struct mddev *mddev, int nodes);
662extern void md_cluster_stop(struct mddev *mddev);
663extern struct md_thread *md_register_thread(
664 void (*run)(struct md_thread *thread),
665 struct mddev *mddev,
666 const char *name);
667extern void md_unregister_thread(struct md_thread **threadp);
668extern void md_wakeup_thread(struct md_thread *thread);
669extern void md_check_recovery(struct mddev *mddev);
670extern void md_reap_sync_thread(struct mddev *mddev);
671extern int mddev_init_writes_pending(struct mddev *mddev);
672extern bool md_write_start(struct mddev *mddev, struct bio *bi);
673extern void md_write_inc(struct mddev *mddev, struct bio *bi);
674extern void md_write_end(struct mddev *mddev);
675extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
676extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
677extern void md_finish_reshape(struct mddev *mddev);
678
679extern int mddev_congested(struct mddev *mddev, int bits);
680extern void md_flush_request(struct mddev *mddev, struct bio *bio);
681extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
682 sector_t sector, int size, struct page *page);
683extern int md_super_wait(struct mddev *mddev);
684extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
685 struct page *page, int op, int op_flags,
686 bool metadata_op);
687extern void md_do_sync(struct md_thread *thread);
688extern void md_new_event(struct mddev *mddev);
689extern void md_allow_write(struct mddev *mddev);
690extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
691extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
692extern int md_check_no_bitmap(struct mddev *mddev);
693extern int md_integrity_register(struct mddev *mddev);
694extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
695extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
696
697extern void mddev_init(struct mddev *mddev);
698extern int md_run(struct mddev *mddev);
699extern int md_start(struct mddev *mddev);
700extern void md_stop(struct mddev *mddev);
701extern void md_stop_writes(struct mddev *mddev);
702extern int md_rdev_init(struct md_rdev *rdev);
703extern void md_rdev_clear(struct md_rdev *rdev);
704
705extern void md_handle_request(struct mddev *mddev, struct bio *bio);
706extern void mddev_suspend(struct mddev *mddev);
707extern void mddev_resume(struct mddev *mddev);
708extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
709 struct mddev *mddev);
710
711extern void md_reload_sb(struct mddev *mddev, int raid_disk);
712extern void md_update_sb(struct mddev *mddev, int force);
713extern void md_kick_rdev_from_array(struct md_rdev * rdev);
714struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
715struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
716
717static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
718{
719 int faulty = test_bit(Faulty, &rdev->flags);
720 if (atomic_dec_and_test(&rdev->nr_pending) && faulty) {
721 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
722 md_wakeup_thread(mddev->thread);
723 }
724}
725
726extern struct md_cluster_operations *md_cluster_ops;
727static inline int mddev_is_clustered(struct mddev *mddev)
728{
729 return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
730}
731
732
733static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
734 unsigned long unsupported_flags)
735{
736 mddev->flags &= ~unsupported_flags;
737}
738
739static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio)
740{
741 if (bio_op(bio) == REQ_OP_WRITE_SAME &&
742 !bio->bi_disk->queue->limits.max_write_same_sectors)
743 mddev->queue->limits.max_write_same_sectors = 0;
744}
745
746static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio)
747{
748 if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
749 !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
750 mddev->queue->limits.max_write_zeroes_sectors = 0;
751}
752#endif
753