md-cluster: show array's status more accurate
authorGuoqing Jiang <gqjiang@suse.com>
Mon, 2 Jul 2018 08:26:25 +0000 (16:26 +0800)
committerShaohua Li <shli@fb.com>
Thu, 5 Jul 2018 18:17:01 +0000 (11:17 -0700)
When resync or recovery is happening in one node,
other nodes don't show the appropriate info now.

For example, when create an array in master node
without "--assume-clean", then assemble the array
in slave nodes, you can see "resync=PENDING" when
read /proc/mdstat in slave nodes. However, the info
is confusing since "PENDING" status is introduced
for start array in read-only mode.

We introduce RESYNCING_REMOTE flag to indicate that
resync thread is running in remote node. The flags
is set when node receive RESYNCING msg. And we clear
the REMOTE flag in following cases:

1. resync or recover is finished in master node,
   which means slaves receive msg with both lo
   and hi are set to 0.
2. node continues resync/recovery in recover_bitmaps.
3. when resync_finish is called.

Then we show accurate information in status_resync
by check REMOTE flags and with other conditions.

Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
drivers/md/md-cluster.c
drivers/md/md.c
drivers/md/md.h

index 1ac945f7a3c2cc922c5ea9c64f1d2493d1ea2fa9..5ed13c4fe72d52c772d35f6bb170174c074fc444 100644 (file)
@@ -338,8 +338,14 @@ static void recover_bitmaps(struct md_thread *thread)
                        /* wake up thread to continue resync in case resync
                         * is not finished */
                        if (mddev->recovery_cp != MaxSector) {
-                           set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-                           md_wakeup_thread(mddev->thread);
+                               /*
+                                * clear the REMOTE flag since we will launch
+                                * resync thread in current node.
+                                */
+                               clear_bit(MD_RESYNCING_REMOTE,
+                                         &mddev->recovery);
+                               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+                               md_wakeup_thread(mddev->thread);
                        }
                }
 clear_bit:
@@ -458,6 +464,11 @@ static void process_suspend_info(struct mddev *mddev,
        struct suspend_info *s;
 
        if (!hi) {
+               /*
+                * clear the REMOTE flag since resync or recovery is finished
+                * in remote node.
+                */
+               clear_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
                remove_suspend_info(mddev, slot);
                set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
                md_wakeup_thread(mddev->thread);
@@ -586,6 +597,7 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
                revalidate_disk(mddev->gendisk);
                break;
        case RESYNCING:
+               set_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
                process_suspend_info(mddev, le32_to_cpu(msg->slot),
                                     le64_to_cpu(msg->low),
                                     le64_to_cpu(msg->high));
@@ -1266,6 +1278,8 @@ static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
 static int resync_finish(struct mddev *mddev)
 {
        struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       clear_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
        dlm_unlock_sync(cinfo->resync_lockres);
        return resync_info_update(mddev, 0, 0);
 }
index 994aed2f9dfff4135170102265523045e893ac0a..da83d8710579bef5a752f78518076d3b0ee9feab 100644 (file)
@@ -7680,6 +7680,23 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
                resync -= atomic_read(&mddev->recovery_active);
 
        if (resync == 0) {
+               if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
+                       struct md_rdev *rdev;
+
+                       rdev_for_each(rdev, mddev)
+                               if (rdev->raid_disk >= 0 &&
+                                   !test_bit(Faulty, &rdev->flags) &&
+                                   rdev->recovery_offset != MaxSector &&
+                                   rdev->recovery_offset) {
+                                       seq_printf(seq, "\trecover=REMOTE");
+                                       return 1;
+                               }
+                       if (mddev->reshape_position != MaxSector)
+                               seq_printf(seq, "\treshape=REMOTE");
+                       else
+                               seq_printf(seq, "\tresync=REMOTE");
+                       return 1;
+               }
                if (mddev->recovery_cp < MaxSector) {
                        seq_printf(seq, "\tresync=PENDING");
                        return 1;
index 2d148bdaba744ad18015929b61c441061328d693..8afd6bfdbfb9b5934097b3d0b3378dbd0d53f778 100644 (file)
@@ -496,6 +496,7 @@ enum recovery_flags {
        MD_RECOVERY_FROZEN,     /* User request to abort, and not restart, any action */
        MD_RECOVERY_ERROR,      /* sync-action interrupted because io-error */
        MD_RECOVERY_WAIT,       /* waiting for pers->start() to finish */
+       MD_RESYNCING_REMOTE,    /* remote node is running resync thread */
 };
 
 static inline int __must_check mddev_lock(struct mddev *mddev)