Skip to content

Commit 5d1dd57

Browse files
Li Nanhailan94
authored andcommitted
md: remove recovery_disabled
'recovery_disabled' logic is complex and confusing, originally intended to preserve raid in extreme scenarios. It was used in following cases: - When sync fails and setting badblocks also fails, kick out non-In_sync rdev and block spare rdev from joining to preserve raid [1] - When last backup is unavailable, prevent repeated add-remove of spares triggering recovery [2] The original issues are now resolved: - Error handlers in all raid types prevent last rdev from being kicked out - Disks with failed recovery are marked Faulty and can't re-join Therefore, remove 'recovery_disabled' as it's no longer needed. [1] 5389042 ("md: change managed of recovery_disabled.") [2] 4044ba5 ("md: don't retry recovery of raid1 that fails due to error on source drive.") Link: https://lore.kernel.org/linux-raid/[email protected] Signed-off-by: Li Nan <[email protected]> Signed-off-by: Yu Kuai <[email protected]>
1 parent 7435b73 commit 5d1dd57

8 files changed

Lines changed: 4 additions & 51 deletions

File tree

drivers/md/md.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2618,9 +2618,6 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
26182618
list_add_rcu(&rdev->same_set, &mddev->disks);
26192619
bd_link_disk_holder(rdev->bdev, mddev->gendisk);
26202620

2621-
/* May as well allow recovery to be retried once */
2622-
mddev->recovery_disabled++;
2623-
26242621
return 0;
26252622

26262623
fail:

drivers/md/md.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -505,12 +505,6 @@ struct mddev {
505505
int ok_start_degraded;
506506

507507
unsigned long recovery;
508-
/* If a RAID personality determines that recovery (of a particular
509-
* device) will fail due to a read error on the source device, it
510-
* takes a copy of this number and does not attempt recovery again
511-
* until this number changes.
512-
*/
513-
int recovery_disabled;
514508

515509
int in_sync; /* know to not need resync */
516510
/* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so

drivers/md/raid1.c

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,7 +1760,6 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
17601760
set_bit(MD_BROKEN, &mddev->flags);
17611761

17621762
if (!test_bit(MD_FAILLAST_DEV, &mddev->flags)) {
1763-
conf->recovery_disabled = mddev->recovery_disabled;
17641763
spin_unlock_irqrestore(&conf->device_lock, flags);
17651764
return;
17661765
}
@@ -1904,7 +1903,6 @@ static bool raid1_remove_conf(struct r1conf *conf, int disk)
19041903

19051904
/* Only remove non-faulty devices if recovery is not possible. */
19061905
if (!test_bit(Faulty, &rdev->flags) &&
1907-
rdev->mddev->recovery_disabled != conf->recovery_disabled &&
19081906
rdev->mddev->degraded < conf->raid_disks)
19091907
return false;
19101908

@@ -1924,9 +1922,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
19241922
int first = 0;
19251923
int last = conf->raid_disks - 1;
19261924

1927-
if (mddev->recovery_disabled == conf->recovery_disabled)
1928-
return -EBUSY;
1929-
19301925
if (rdev->raid_disk >= 0)
19311926
first = last = rdev->raid_disk;
19321927

@@ -2346,7 +2341,6 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
23462341
*/
23472342
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) ||
23482343
!fix_sync_read_error(r1_bio)) {
2349-
conf->recovery_disabled = mddev->recovery_disabled;
23502344
md_done_sync(mddev, r1_bio->sectors);
23512345
md_sync_error(mddev);
23522346
put_buf(r1_bio);
@@ -2948,16 +2942,12 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
29482942
*skipped = 1;
29492943
put_buf(r1_bio);
29502944

2951-
if (!ok) {
2952-
/* Cannot record the badblocks, so need to
2945+
if (!ok)
2946+
/* Cannot record the badblocks, md_error has set INTR,
29532947
* abort the resync.
2954-
* If there are multiple read targets, could just
2955-
* fail the really bad ones ???
29562948
*/
2957-
conf->recovery_disabled = mddev->recovery_disabled;
2958-
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
29592949
return 0;
2960-
} else
2950+
else
29612951
return min_bad;
29622952

29632953
}
@@ -3144,7 +3134,6 @@ static struct r1conf *setup_conf(struct mddev *mddev)
31443134
init_waitqueue_head(&conf->wait_barrier);
31453135

31463136
bio_list_init(&conf->pending_bio_list);
3147-
conf->recovery_disabled = mddev->recovery_disabled - 1;
31483137

31493138
err = -EIO;
31503139
for (i = 0; i < conf->raid_disks * 2; i++) {

drivers/md/raid1.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,6 @@ struct r1conf {
9393
*/
9494
int fullsync;
9595

96-
/* When the same as mddev->recovery_disabled we don't allow
97-
* recovery to be attempted as we expect a read error.
98-
*/
99-
int recovery_disabled;
100-
10196
mempool_t *r1bio_pool;
10297
mempool_t r1buf_pool;
10398

drivers/md/raid10.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2130,8 +2130,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
21302130
mirror = first;
21312131
for ( ; mirror <= last ; mirror++) {
21322132
p = &conf->mirrors[mirror];
2133-
if (p->recovery_disabled == mddev->recovery_disabled)
2134-
continue;
21352133
if (p->rdev) {
21362134
if (test_bit(WantReplacement, &p->rdev->flags) &&
21372135
p->replacement == NULL && repl_slot < 0)
@@ -2143,7 +2141,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
21432141
if (err)
21442142
return err;
21452143
p->head_position = 0;
2146-
p->recovery_disabled = mddev->recovery_disabled - 1;
21472144
rdev->raid_disk = mirror;
21482145
err = 0;
21492146
if (rdev->saved_raid_disk != mirror)
@@ -2196,7 +2193,6 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
21962193
* is not possible.
21972194
*/
21982195
if (!test_bit(Faulty, &rdev->flags) &&
2199-
mddev->recovery_disabled != p->recovery_disabled &&
22002196
(!p->replacement || p->replacement == rdev) &&
22012197
number < conf->geo.raid_disks &&
22022198
enough(conf, -1)) {
@@ -2535,8 +2531,6 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
25352531
pr_notice("md/raid10:%s: recovery aborted due to read error\n",
25362532
mdname(mddev));
25372533

2538-
conf->mirrors[dw].recovery_disabled
2539-
= mddev->recovery_disabled;
25402534
set_bit(MD_RECOVERY_INTR,
25412535
&mddev->recovery);
25422536
break;
@@ -4075,8 +4069,6 @@ static int raid10_run(struct mddev *mddev)
40754069
disk->replacement->saved_raid_disk < 0) {
40764070
conf->fullsync = 1;
40774071
}
4078-
4079-
disk->recovery_disabled = mddev->recovery_disabled - 1;
40804072
}
40814073

40824074
if (mddev->resync_offset != MaxSector)

drivers/md/raid10.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,6 @@
1818
struct raid10_info {
1919
struct md_rdev *rdev, *replacement;
2020
sector_t head_position;
21-
int recovery_disabled; /* matches
22-
* mddev->recovery_disabled
23-
* when we shouldn't try
24-
* recovering this device.
25-
*/
2621
};
2722

2823
struct r10conf {

drivers/md/raid5.c

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2922,7 +2922,6 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
29222922

29232923
if (has_failed(conf)) {
29242924
set_bit(MD_BROKEN, &conf->mddev->flags);
2925-
conf->recovery_disabled = mddev->recovery_disabled;
29262925

29272926
pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n",
29282927
mdname(mddev), mddev->degraded, conf->raid_disks);
@@ -3727,10 +3726,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
37273726
}
37283727
md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf));
37293728

3730-
if (abort) {
3731-
conf->recovery_disabled = conf->mddev->recovery_disabled;
3729+
if (abort)
37323730
md_sync_error(conf->mddev);
3733-
}
37343731
}
37353732

37363733
static int want_replace(struct stripe_head *sh, int disk_idx)
@@ -7548,8 +7545,6 @@ static struct r5conf *setup_conf(struct mddev *mddev)
75487545
}
75497546

75507547
conf->bypass_threshold = BYPASS_THRESHOLD;
7551-
conf->recovery_disabled = mddev->recovery_disabled - 1;
7552-
75537548
conf->raid_disks = mddev->raid_disks;
75547549
if (mddev->reshape_position == MaxSector)
75557550
conf->previous_raid_disks = mddev->raid_disks;
@@ -8249,7 +8244,6 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
82498244
* isn't possible.
82508245
*/
82518246
if (!test_bit(Faulty, &rdev->flags) &&
8252-
mddev->recovery_disabled != conf->recovery_disabled &&
82538247
!has_failed(conf) &&
82548248
(!p->replacement || p->replacement == rdev) &&
82558249
number < conf->raid_disks) {
@@ -8310,8 +8304,6 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
83108304

83118305
return 0;
83128306
}
8313-
if (mddev->recovery_disabled == conf->recovery_disabled)
8314-
return -EBUSY;
83158307

83168308
if (rdev->saved_raid_disk < 0 && has_failed(conf))
83178309
/* no point adding a device */

drivers/md/raid5.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -640,7 +640,6 @@ struct r5conf {
640640
* (fresh device added).
641641
* Cleared when a sync completes.
642642
*/
643-
int recovery_disabled;
644643
/* per cpu variables */
645644
struct raid5_percpu __percpu *percpu;
646645
int scribble_disks;

0 commit comments

Comments
 (0)