background path testing
--- diff/drivers/md/dm-mpath.c	2003-12-29 10:16:45.000000000 +0000
+++ source/drivers/md/dm-mpath.c	2003-12-29 10:16:52.000000000 +0000
@@ -25,18 +25,20 @@
 	struct priority_group *pg;
 
 	int has_failed;
-	jiffy_t fail_time;
 	atomic_t fail_count;
 	atomic_t fail_total;
 
+	struct semaphore test_lock;
 	sector_t test_sector;
+	struct bio *test_bio;
+	struct page *test_page;	/* FIXME: share this between all paths ? */
 };
 
 struct priority_group {
 	struct list_head list;
 
 	unsigned priority;
-	struct path_selector ps;
+	struct path_selector *ps;
 	struct list_head valid_paths;
 	struct list_head invalid_paths;
 };
@@ -56,8 +58,7 @@
 	struct bio *failed_ios;
 
 	unsigned test_interval;
-//	atomic_t suspended;		/* device suspension */
-//	int throw_event;
+	atomic_t trigger_event;
 };
 
 static struct path *alloc_path(void)
@@ -68,6 +69,20 @@
 		memset(path, 0, sizeof(*path));
 		atomic_set(&path->fail_count, 0);
 		atomic_set(&path->fail_total, 0);
+		init_MUTEX_LOCKED(&path->test_lock); /* resume will unlock */
+
+		path->test_bio = bio_alloc(GFP_KERNEL, 1);
+		if (!path->test_bio) {
+			kfree(path);
+			return NULL;
+		}
+
+		path->test_page = alloc_page(GFP_KERNEL);
+		if (!p->test_page) {
+			bio_put(path->test_bio);
+			kfree(path);
+			return NULL;
+		}
 	}
 
 	return path;
@@ -75,6 +90,9 @@
 
 static inline void free_path(struct path *p)
 {
+	ClearPageLocked(p->test_page);
+	__free_page(p->test_page);
+	bio_put(&p->test_bio);
 	kfree(p);
 }
 
@@ -146,6 +164,47 @@
 }
 
 /*-----------------------------------------------------------------
+ * All paths should be tested periodically.
+ *---------------------------------------------------------------*/
+static void __fail_path(struct path *path)
+{
+	if (path->has_failed)
+		return;
+
+	/* FIXME: this is brain dead */
+	if (!atomic_dec_and_test(&path->fail_count))
+		return;
+
+	path->has_failed = 1;
+//	path->fail_time = jiffies;
+	atomic_inc(&path->fail_total);
+	list_del(&path->list);
+	list_add(&path->list, &path->pg->invalid_paths);
+	path->pg->ps.type->set_path_state(&path->pg->ps, path, 0);
+}
+
+static void test_endio(struct bio *bio, unsigned int done, int error)
+{
+	struct path *path = (struct path *) bio->bi_private;
+
+	if (bio->bi_size)
+		return 1;
+
+	if (error)
+		__fail_path(path);
+
+	up(&p->test_lock);
+}
+
+static void test_path(struct path *p)
+{
+	if (down_trylock(&p->test_lock))
+		return;		/* last test io still pending */
+
+	submit_bio(p->test_bio);
+}
+
+/*-----------------------------------------------------------------
  * The multipath daemon is responsible for resubmitting failed ios.
  *---------------------------------------------------------------*/
 static struct dm_daemon _kmpathd;
@@ -171,26 +230,39 @@
 	}
 }
 
-/* Requeue error ios */
-static void do_ios(void)
+static void iterate_paths(struct multipath *m, void (*fn)(struct path *p))
 {
-	struct multipath *m;
+	struct priority_group *pg;
+	struct path *p;
 
-	spin_lock(&_mpath_lock);
-	list_for_each_entry (m, &_mpaths, list)
-		dispatch_failed_ios(m);
-	spin_unlock(&_mpath_lock);
+	down_read(&m->path_lock);
+	list_for_each_entry (pg, &m->priority_groups, list) {
+		list_for_each_entry (p, &pg->valid_paths, list)
+			fn(p);
 
-	blk_run_queues();
+		list_for_each_entry (p, &pg->invalid_paths, list)
+			fn(p);
+	}
+	up_read(&m->path_lock);
 }
 
 /* Multipathd does this every time it runs, returns a sleep duration hint */
 static jiffy_t do_work(void)
 {
-	do_ios();
-//	do_table_events();
-//	return do_scrubbing();
-	return 0;
+	struct multipath *m;
+
+	spin_lock(&_mpath_lock);
+	list_for_each_entry (m, &_mpaths, list) {
+		dispatch_failed_ios(m);
+		iterate_paths(m, test_path);
+
+		if (atomic_dec_and_test(&m->trigger_event))
+			dm_table_event(m->ti);
+	}
+	spin_unlock(&_mpath_lock);
+
+	blk_run_queues();
+	return (jiffy_t) 0;
 }
 
 /*-----------------------------------------------------------------
@@ -241,6 +313,18 @@
 	as->argv += n;
 }
 
+static void init_test_bio(struct path *p)
+{
+	struct bio *bio = p->test_bio;
+
+	bio->bi_sector = 0
+	bio->bi_rw |= (1 << BIO_RW_FAILFAST);
+	bio->bi_bdev = path->dev->bdev;
+	bio->bi_end_io = test_endio;
+	bio->bi_private = p;
+	bio_add_page(bio, p->test_page, ??? hard sect size ???, 0);
+}
+
 static struct path *parse_path(struct arg_set *as, struct path_selector *ps,
 			       struct dm_target *ti)
 {
@@ -263,6 +347,7 @@
 		ti->error = ESTR("error getting device");
 		goto bad;
 	}
+	init_test_bio(p);
 
 	r = ps->type->add_path(ps, p, as->argc, as->argv, &ti->error);
 	if (r) {
@@ -556,23 +641,6 @@
 	return NULL;
 }
 
-static void __fail_path(struct path *path)
-{
-	if (path->has_failed)
-		return;
-
-	if (!atomic_dec_and_test(&path->fail_count))
-		return;
-
-	path->has_failed = 1;
-	path->fail_time = jiffies;
-	atomic_inc(&path->fail_total);
-//	path->test_sector = sector;
-	list_del(&path->list);
-	list_add(&path->list, &path->pg->invalid_paths);
-	path->pg->ps.type->set_path_state(&path->pg->ps, path, 0);
-}
-
 static int __resubmit_io(struct multipath *m, struct bio *bio)
 {
 	int r;
@@ -615,7 +683,21 @@
 	return r;
 }
 
-/* Multipath status */
+static void lock_path(struct path *p) {down(&path->test_lock);}
+static void unlock_path(struct path *p) {up(&path->test_lock);}
+
+static void multipath_suspend(struct dm_target *ti)
+{
+	struct multipath *m = (struct multipath *) ti->private;
+	iterate_paths(m, lock_path);
+}
+
+static void multipath_resume(struct dm_target *ti)
+{
+	struct multipath *m = (struct multipath *) ti->private;
+	iterate_paths(m, unlock_path);
+}
+
 static int multipath_status(struct dm_target *ti, status_type_t type,
 			    char *result, unsigned int maxlen)
 {
@@ -683,266 +765,6 @@
 MODULE_AUTHOR("Sistina software <dm@uk.sistina.com>");
 MODULE_LICENSE("GPL");
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-#ifdef SCRUB_STUFF
-/* Reset failure information on a path */
-static inline void reset_failures(struct path *path)
-{
-	struct path_selector *ps = &path->m->ps;
-
-	path->test_sector = 0;
-	atomic_set(&path->fail, path->fail_limit);
-	clear_bit(FAILED, &path->flags);
-	ps->type->set_path_state(path->ps_private, 0);
-}
-
-/* Reset a "failed" path
- * (IOW: set it to operational so that it can be selected for IO submission)
- */
-static void reset_failed(struct multipath_io *io)
-{
-	struct path *path = io->path;
-
-	if (is_failed(path)) {
-		reset_failures(path);
-		queue_table_event(io);
-	}
-}
-
-/* Scrub IO handling */
-static inline void reset_scrub_io(struct path *path)
-{
-	clear_bit(SCRUB_IO, &path->flags);
-}
-
-
-/* Scrub timeout calculation */
-static inline unsigned long get_reactivation_timeout(struct path *path)
-{
-	return path->reactivation_interval * HZ;
-}
-
-static inline unsigned long get_scrub_timeout(struct path *path)
-{
-	return path->m->scrub_interval * HZ;
-}
-
-/* Calculate scrubbing sleep timeout for deamon */
-static int scrub_timeout(struct path *path, long *timeout)
-{
-	int ret = 0;
-	jiffy_t j = get_fail_time(path);
-	jiffy_t t = is_failed(path) ? get_reactivation_timeout(path) :
-		get_scrub_timeout(path);
-
-	if (t) {
-		/* Jiffies wrap around check */
-		if (jiffies < j) {
-			*timeout = HZ;
-			set_fail_time(path);
-			return 1;
-		}
-
-		j += t;
-		if (jiffies < j)
-			j -= jiffies;
-		else {
-			j = t;
-			ret = 1;
-		}
-
-		if (*timeout > j)
-			*timeout = (long) j;
-	}
-
-	return ret;
-}
-
-/* Allocate a scrubing IO buffer_head and page */
-static  struct buffer_head *alloc_scrub_bh(void)
-{
-	struct buffer_head *bh = kmalloc(sizeof(*bh), GFP_NOIO);
-
-	if (bh) {
-		memset(bh, 0, sizeof(*bh));
-
-		/* Well, might be (a little) too large but it's easy */
-		bh->b_page = alloc_page(GFP_NOIO);
-		if (!bh->b_page) {
-			kfree(bh);
-			return NULL;
-		}
-
-		LockPage(bh->b_page);
-		set_bit(BH_Lock, &bh->b_state);
-		set_bit(BH_Mapped, &bh->b_state);
-		bh->b_data = page_address(bh->b_page);
-		bh->b_size = PAGE_SIZE;
-	}
-
-	return bh;
-}
-
-/* Free a scrubing IO page and buffer_head */
-static void free_scrub_bh(struct buffer_head *bh)
-{
-	UnlockPage(bh->b_page);
-	__free_page(bh->b_page);
-	kfree(bh);
-}
-
-/* Scrubbing end IO function */
-static void multipath_scrub_end_io(struct buffer_head *bh, int uptodate)
-{
-	struct multipath *m = (struct multipath *) io->m;
-
-	if (uptodate) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&m->lock, flags);
-		reset_failed(io);
-		spin_unlock_irqrestore(&m->lock, flags);
-
-		dm_daemon_wake(&_kmultipathd);
-	}
-
-	reset_scrub_io(io->path);
-	free_scrub_bh(io->bh);
-}
-
-/*
- * Queue a test read IO to a path (path scrubbing)
- *
- * Returns
- *
- *	0: scrub IO already in progress or error (retry later)
- *	1: scrub IO queued
- *
- */
-static int queue_scrub_io(struct path *path)
-{
-	struct multipath_io *io;
-	struct buffer_head *bh;
-
-	if (test_and_set_bit(SCRUB_IO, &path->flags))
-		goto out;
-
-	bh = alloc_scrub_bh();
-	if (!bh)
-		goto retry;	/* just retry later */
-
-	/*
-	 * No need to set b_dev, b_blocknr, b_count or initialize
-	 * the wait queue here.
-	 */
-	bh->b_rdev = path->dev->dev;
-	bh->b_rsector = path->test_sector;
-	bh->b_end_io = multipath_scrub_end_io;
-	bh->b_private = io;
-
-	make_request(io);
-	run_task_queue(&tq_disk);
-
-	return 1;
-
-retry:
-	reset_scrub_io(path);
-
-out:
-	return 0;
-}
-
-/*
- * Check if paths need to get a test io queued either for
- * automatic failure recovery or scrubbing of idle paths.
- */
-static long do_scrubbing(void)
-{
-	unsigned long flags;
-	long timeout = MAX_SCHEDULE_TIMEOUT;
-	struct multipath *m;
-
-	/* FIXME: optimize this in case no scrubbing is needed */
-	spin_lock_irqsave(&_mpath_lock, flags);
-	list_for_each_entry (m, &_mpaths, list) {
-		struct path *path;
-
-		/* Don't scrub suspended ms */
-		if (atomic_read(&m->suspended))
-			continue;
-
-		list_for_each_entry (path, &m->paths, list) {
-			if (scrub_timeout(path, &timeout))
-				queue_scrub_io(path);
-		}
-	}
-	spin_unlock_irqrestore(&_mpath_lock, flags);
-
-	return timeout;
-}
-
-static void wait_for_scrub_ios(struct multipath *m)
-{
-	struct path *path;
-
-	list_for_each_entry (path, &m->paths, list) {
-		while (test_bit(SCRUB_IO, &path->flags))
-			schedule_timeout(HZ / 2);
-	}
-}
-
-
-#endif
-
-
-
-
-#ifdef EVENT_STUFF
-/* "Queue" an event on a table in order to process
-   dm_table_event() calls in task context */
-static inline void queue_table_event(struct multipath_io *io)
-{
-	struct multipath *m = (struct multipath *) io->m;
-
-	atomic_inc(&m->events);
-}
-
-/* Work all table events thrown */
-static void do_table_events(void)
-{
-	unsigned long flags;
-	struct multipath *m;
-
-	/* FIXME: optimize this in case no events need to be thrown
-	   (which is most of the time) */
-	spin_lock_irqsave(&_mpath_lock, flags);
-	list_for_each_entry (m, &_mpaths, list) {
-
-		/* Throw all events queued */
-		while (atomic_read(&m->events)) {
-			dm_table_event(m->ti->table);
-			atomic_dec(&m->events);
-		}
-	}
-	spin_unlock_irqrestore(&_mpath_lock, flags);
-}
-
-
-#endif
-
-
 #ifdef STATUS_FOO
 
 	int sz = 0;
@@ -1002,26 +824,3 @@
 
 	return 0;
 #endif
-
-
-
-#if 0
-/* Suspend */
-static void multipath_suspend(struct dm_target *ti)
-{
-	struct multipath *m = (struct multipath *) ti->private;
-
-	//atomic_set(&m->suspended, 1);
-	//wait_for_scrub_ios(m);
-}
-
-/* Resume */
-static void multipath_resume(struct dm_target *ti)
-{
-	struct multipath *m = (struct multipath *) ti->private;
-
-	//atomic_set(&m->suspended, 0);
-	dm_daemon_wake(&_kmpathd);
-}
-
-#endif