From: Jonathan Brassow <jbrassow@redhat.com>

This patch gives mirror the ability to handle write failures
during recovery.

When kcopyd finishes resynchronizing a mirror region, it calls
recovery_complete() with the results - which are currently
ignored.  This patch checks over the bits in 'write_err' and
calls a new function, fail_mirror, on those devices whose bit
is set.  'fail_mirror' increments the error_count on the mirror
device, and will switch the primary device pointer for the mirror
set if the mirror is in-sync.

To maintain backwards compatibility, fail_mirror does nothing
if the DM_FEATURES_HANDLE_ERRORS flag is not present.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>

---
 drivers/md/dm-raid1.c |   75 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 72 insertions(+), 3 deletions(-)

Index: linux/drivers/md/dm-raid1.c
===================================================================
--- linux.orig/drivers/md/dm-raid1.c	2007-07-12 17:04:07.000000000 +0100
+++ linux/drivers/md/dm-raid1.c	2007-07-12 17:04:14.000000000 +0100
@@ -114,6 +114,7 @@ struct region {
  *---------------------------------------------------------------*/
 struct mirror {
 	atomic_t error_count;
+	struct mirror_set *ms;
 	struct dm_dev *dev;
 	sector_t offset;
 };
@@ -644,6 +645,53 @@ static void bio_set_ms(struct bio *bio, 
 	bio->bi_next = (struct bio *) ms;
 }
 
+/* fail_mirror
+ * @m: mirror device to fail
+ *
+ * If the device is valid, mark it invalid.  Also,
+ * if this is the default mirror device (i.e. the primary
+ * device) and the mirror set is in-sync, choose an
+ * alternative primary device.
+ *
+ * This function must not block
+ */
+static void fail_mirror(struct mirror *m)
+{
+	struct mirror_set *ms = m->ms;
+	struct mirror *new;
+
+	/* Are we handling or ignoring device failures */
+	if (!errors_handled(ms))
+		return;
+
+	if (atomic_inc_return(&m->error_count) > 1)
+		return;
+
+	if (m != ms->default_mirror)
+		return;
+
+	/* If the default mirror fails, change it. */
+	if (!ms->in_sync) {
+		/*
+		 * Cannot switch primary.  Better to issue requests
+		 * to same failing device than to risk returning
+		 * corrupt data.
+		 */
+		DMERR("Primary mirror (%s) failed while out-of-sync: "
+		      "Reads may fail.", m->dev->name);
+		return;
+	}
+
+	for (new = ms->mirror; new < ms->mirror + ms->nr_mirrors; new++)
+		if (!atomic_read(&new->error_count)) {
+			ms->default_mirror = new;
+			break;
+		}
+
+	if (unlikely(new == ms->mirror + ms->nr_mirrors))
+		DMWARN("All sides of mirror have failed.");
+}
+
 /*-----------------------------------------------------------------
  * Recovery.
  *
@@ -655,15 +703,34 @@ static void recovery_complete(int read_e
 			      void *context)
 {
 	struct region *reg = (struct region *) context;
+	struct mirror_set *ms = reg->rh->ms;
+	unsigned long write_err_ulong = (unsigned long)write_err;
+	int m, bit = 0;
 
 	if (read_err)
 		/* Read error means the failure of default mirror. */
 		DMERR_LIMIT("Unable to read primary mirror during recovery");
 
-	if (write_err)
-		DMERR_LIMIT("Write error during recovery (error = 0x%x)",
-			    write_err);
+	if (!write_err)
+		goto out;
+
+	DMERR_LIMIT("Write error during recovery (error = 0x%x)",
+		    write_err);
+
+	/*
+	 * Bits correspond to devices (excluding default mirror).
+	 * The default mirror cannot change during recovery.
+	 */
+	for (m = 0; m < ms->nr_mirrors; m++) {
+		if (&ms->mirror[m] == ms->default_mirror)
+			continue;
+
+		if (test_bit(bit, &write_err_ulong))
+			fail_mirror(ms->mirror + m);
+		bit++;
+	}
 
+      out:
 	rh_recovery_end(reg, !(read_err || write_err));
 }
 
@@ -1017,6 +1084,8 @@ static int get_mirror(struct mirror_set 
 	}
 
 	ms->mirror[mirror].offset = offset;
+	atomic_set(&(ms->mirror[mirror].error_count), 0);
+	ms->mirror[mirror].ms = ms;
 
 	return 0;
 }