mpath porting
--- diff/drivers/md/Kconfig	2003-12-29 10:12:47.000000000 +0000
+++ source/drivers/md/Kconfig	2003-12-29 10:16:08.000000000 +0000
@@ -156,5 +156,11 @@
 	  Allow volume managers to mirror logical volumes, also
           needed for live data migration tools such as 'pvmove'.
 
+config DM_MULTIPATH
+	tristate "Multipath target (EXPERIMENTAL)"
+	depends on BLK_DEV_DM && EXPERIMENTAL
+	---help---
+	  Allow volume managers to support multipath hardware.
+
 endmenu
 
--- diff/drivers/md/Makefile	2003-12-29 10:15:04.000000000 +0000
+++ source/drivers/md/Makefile	2003-12-29 10:16:08.000000000 +0000
@@ -9,6 +9,9 @@
 
 dm-mirror-objs	:= dm-log.o dm-raid1.o
 
+dm-multipath-objs := dm-path-selector.o dm-null-ps.o	\
+		     dm-latency-ps.o dm-mpath.o
+
 # Note: link order is important.  All raid personalities
 # and xor.o must come before md.o, as they each initialise 
 # themselves, and md.o may use the personalities when it 
@@ -23,3 +26,4 @@
 obj-$(CONFIG_BLK_DEV_DM)	+= dm-mod.o
 obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
 obj-$(CONFIG_DM_MIRROR)		+= dm-mirror.o
+obj-$(CONFIG_DM_MULTIPATH)		+= dm-multipath.o
--- diff/drivers/md/dm-daemon.c	2003-12-29 10:12:47.000000000 +0000
+++ source/drivers/md/dm-daemon.c	2003-12-29 10:16:08.000000000 +0000
@@ -59,7 +59,7 @@
 	return 0;
 }
 
-int dm_daemon_start(struct dm_daemon *dd, const char *name, void (*fn)(void))
+int dm_daemon_start(struct dm_daemon *dd, const char *name, jiffy_t (*fn)(void))
 {
 	pid_t pid = 0;
 
--- diff/drivers/md/dm-daemon.h	2003-12-29 10:15:50.000000000 +0000
+++ source/drivers/md/dm-daemon.h	2003-12-29 10:16:08.000000000 +0000
@@ -25,7 +25,7 @@
 	wait_queue_head_t job_queue;
 };
 
-int dm_daemon_start(struct dm_daemon *dd, const char *name, void (*fn)(void));
+int dm_daemon_start(struct dm_daemon *dd, const char *name, jiffy_t (*fn)(void));
 void dm_daemon_stop(struct dm_daemon *dd);
 void dm_daemon_wake(struct dm_daemon *dd);
 int dm_daemon_running(struct dm_daemon *dd);
--- diff/drivers/md/dm-latency-ps.c	2003-12-29 10:16:02.000000000 +0000
+++ source/drivers/md/dm-latency-ps.c	2003-12-29 10:16:08.000000000 +0000
@@ -18,35 +18,94 @@
 #include "dm-path-selector.h"
 
 #include <linux/slab.h>
+#include <linux/mempool.h>
 
-/* Path selector context */
-struct latency_c {
-	struct list_head paths;		/* List of operational paths */
-	struct list_head failed_paths;	/* List of failed paths */
+/* Specific info about a path needed by this selector */
+struct path_info {
+	struct list_head list;
+	struct path *path;
 
 	spinlock_t lock;
 
-	int prio_group;			/* Actual priority group to select
-					   path from */
+	int valid;
+	unsigned priority;
+	sector_t io_min;
+
+	sector_t io_count;
+	unsigned long long latency;
+
+	/* used to calculate the average */
+	unsigned latency_count;
+	jiffy_t latency_tot;
 };
 
-/* Path info */
-struct path_c {
-	struct list_head list;		/* Linked list to latency_c */
+/*
+ * We need to record the io start time and path for every io :(
+ */
+struct io {
+	jiffy_t start;
+	struct path_info *pi;
+};
 
+/* Path selector context */
+struct latency_c {
 	spinlock_t lock;
 
-	struct path *path;		/* Opaque pointer to caller path info */
-	struct latency_c *lc;		/* Back pointer to latency context */
+	struct path_info *last_path;
+	struct list_head valid_paths;
+	struct list_head invalid_paths;
+
+	/*
+	 * FIXME: not wired these up yet.
+	 */
+	kmem_cache_t *io_cache;
+	mempool_t *io_pool;
+};
 
-	/* Set by add_path() arguments */
-	int priority;
-	int queue_min;
+/*-----------------------------------------------------------------
+ * The path lists are maintained in priority order (low to high).
+ *---------------------------------------------------------------*/
+static void path_ordered(struct list_head *head, struct path_info *pi)
+{
+	struct path_info *cursor;
+	unsigned last = 0;
+	int seen = 0;
 
-	/* Internal use */
-	int io_count;
-	unsigned long long latency;
-};
+	list_for_each_entry (cursor, head, list) {
+		BUG_ON (cursor->priority < last);
+
+		last = cursor->priority;
+		if (cursor == pi)
+			seen = 1;
+	}
+
+	BUG_ON(!seen);
+}
+
+static void path_insert(struct list_head *head, struct path_info *pi)
+{
+	struct path_info *cursor;
+
+	list_for_each_entry (cursor, head, list)
+		if (cursor->priority >= pi->priority)
+			break;
+
+	list_add_tail(&pi->list, &cursor->list);
+
+	/* FIXME: remove debug later */
+	path_ordered(head, pi);
+}
+
+static struct path_info *path_lookup(struct list_head *head, struct path *p)
+{
+	struct path_info *pi;
+
+	list_for_each_entry (pi, head, list)
+		if (pi->path == p)
+			return pi;
+
+	return NULL;
+}
 
 /* Allocate latency context */
 static struct latency_c *alloc_latency_c(void)
@@ -54,76 +113,58 @@
 	struct latency_c *lc = kmalloc(sizeof(*lc), GFP_KERNEL);
 
 	if (lc) {
-		INIT_LIST_HEAD(&lc->paths);
-		INIT_LIST_HEAD(&lc->failed_paths);
 		lc->lock = SPIN_LOCK_UNLOCKED;
-		lc->prio_group = -1;
+		lc->last_path = NULL;
+		INIT_LIST_HEAD(&lc->valid_paths);
+		INIT_LIST_HEAD(&lc->invalid_paths);
 	}
 
 	return lc;
 }
 
 /* Allocate path context */
-static struct path_c *alloc_path_c(void)
+/* FIXME: remove this ? */
+static struct path_info *alloc_path_info(void)
 {
-	struct path_c *pc = kmalloc(sizeof(*pc), GFP_KERNEL);
+	struct path_info *pi = kmalloc(sizeof(*pi), GFP_KERNEL);
 
-	if (pc) {
-		memset(pc, 0, sizeof(*pc));
-		pc->lock = SPIN_LOCK_UNLOCKED;
-	}
+	if (pi)
+		memset(pi, 0, sizeof(*pi));
 
-	return pc;
+	return pi;
 }
 
 /* Path selector constructor */
-static int latency_ctr(struct path_selector *ps,
-		       int argc, char **argv, char **error)
+static int latency_ctr(struct path_selector *ps)
 {
 	struct latency_c *lc;
 
-	if (argc) {
-		*error = "latency path selector: Invalid number "
-			 "of arguments";
-		return -EINVAL;
-	}
-
 	lc = alloc_latency_c();
-	if (!lc) {
-		*error = "latency path selector: Error allocating context";
+	if (!lc)
 		return -ENOMEM;
-	}
 
-	ps->context = (void *) lc;
+	ps->context = lc;
 	return 0;
 }
 
+static void free_paths(struct list_head *paths)
+{
+	struct path_info *pi, *next;
+
+	list_for_each_entry_safe(pi, next, paths, list) {
+		list_del(&pi->list);
+		kfree(pi);
+	}
+}
+
 /* Path selector destructor */
 static void latency_dtr(struct path_selector *ps)
 {
 	struct latency_c *lc = (struct latency_c *) ps->context;
-	struct list_head *lists[] = {
-		&lc->paths,
-		&lc->failed_paths,
-	};
-	int i = ARRAY_SIZE(lists);
-
-	spin_lock(&lc->lock);
-	while (i--) {
-		struct list_head *elem, *tmp;
-
-		list_for_each_safe(elem, tmp, lists[i]) {
-			struct path_c *pc =
-				list_entry(elem, struct path_c, list);
-
-			list_del(elem);
-			kfree(pc);
-		}
-	}
-	spin_unlock(&lc->lock);
 
+	free_paths(&lc->valid_paths);
+	free_paths(&lc->invalid_paths);
 	kfree(lc);
-	ps->context = NULL;
 }
 
 /* Path add */
@@ -134,129 +175,139 @@
             	    tmp < c ## _MIN || \
 		    tmp > c ## _MAX) { \
 			*error = "latency path selector: Invalid " s; \
-			return NULL; \
+			return -EINVAL; \
 		} \
 		v = tmp; \
         }
 
 #define	PRIORITY_MIN	0
-#define	PRIORITY_MAX	1024*1024
-#define	QUEUE_MIN	0
-#define	QUEUE_MAX	1024*1024
-static void *latency_add_path(struct path_selector *ps, struct path *path,
-			      int argc, char **argv, char **error)
+#define	PRIORITY_MAX	(1024 * 1024)
+#define	IO_MIN	0
+#define	IO_MAX	(1024 * 1024)
+static int latency_add_path(struct path_selector *ps, struct path *path,
+			    int argc, char **argv, char **error)
 {
 	struct latency_c *lc = (struct latency_c *) ps->context;
-	struct path_c *pc;
+	struct path_info *pi;
 
 	if (argc != 2) {
 		*error = "latency path selector: Invalid number of arguments";
-		return NULL;
+		return -EINVAL;
 	}
 
-	pc = alloc_path_c();
-	if (!pc) {
+	pi = alloc_path_info();
+	if (!pi) {
 		*error = "latency path selector: Error allocating path context";
-		return NULL;
+		return -EINVAL;
 	}
 
-	pc->path = path;
-	pc->lc = lc;
-	xx(0, "priority", PRIORITY, pc->priority);
-	xx(1, "queue min", QUEUE, pc->queue_min);
-	pc->io_count = pc->queue_min;
+	pi->path = path;
+	xx(0, "priority", PRIORITY, pi->priority);
+	xx(1, "io min", IO, pi->io_min);
+	pi->io_count = pi->io_min;
+
 	spin_lock(&lc->lock);
-	list_add_tail(&pc->list, &lc->paths);
+	list_add_tail(&pi->list, &lc->valid_paths);
 	spin_unlock(&lc->lock);
 
-	return (void *) pc;
+	return 0;
 }
 #undef xx
 
 /* Path set state */
-static void latency_set_path_state(void *ps_private, unsigned long state)
+static void latency_set_path_state(struct path_selector *ps,
+				   struct path *p, int valid)
 {
 	unsigned long flags;
-	struct path_c *path = (struct path_c *) ps_private;
-	struct latency_c *lc = path->lc;
+	struct latency_c *lc = (struct latency_c *) ps->context;
+	struct path_info *pi;
 
 	spin_lock_irqsave(&lc->lock, flags);
-	/* Fail path */
-	if (state)
-		list_move_tail(&path->list, &lc->failed_paths);
+
+	pi = path_lookup(&lc->valid_paths, p);
+	if (!pi)
+		pi = path_lookup(&lc->invalid_paths, p);
+
+	if (!pi)
+		DMWARN("unknown path");
+
 	else {
-		list_move_tail(&path->list, &lc->paths);
-		list_for_each_entry(path, &lc->paths, list)
-			path->latency = 0;
+		list_del(&pi->list);
+		if (valid) {
+			path_insert(&lc->valid_paths, pi);
+			pi->latency = 0;
+		} else
+			list_add(&pi->list, &lc->invalid_paths);
 	}
+
 	spin_unlock_irqrestore(&lc->lock, flags);
 }
 
 /* Helper function path selector */
-static struct path_c *_path(struct latency_c *lc)
+static struct path_info *__select_path(struct latency_c *lc)
 {
-	struct path_c *path, *high_path = NULL, *ret = NULL;
-	int high_prio = INT_MAX;
+	struct path_info *pi, *best = NULL, *last = lc->last_path;
 	unsigned long long latency = ~0ULL;
+	unsigned priority = ~0;
 
-	/* Any operational paths ? */
-	list_for_each_entry(path, &lc->paths, list) {
-		/* Find path with highest riority */
-		if (high_prio > path->priority) {
-			high_prio = path->priority;
-			high_path = path;
-		}
-
-		/* Skip paths which aren't members of this priority group */
-		if (path->priority != lc->prio_group)
-			continue;
-
-		/* Ensure minimum IO queue */
-		if (path->io_count) {
-			path->io_count--;
-			ret = path;
+	/*
+	 * We only change paths if enough io has gone through.
+	 */
+	if (last) {
+		/* FIXME: add locking around path->valid ? */
+		if (last->valid && last->io_count < last->io_min)
+			return last;
+
+		/* recalculate the latency for last */
+		last->latency = last->latency_tot / last->latency_count;
+		last->latency_count = 0;
+		last->latency_tot = 0;
+	}
+
+	/*
+	 * Choose a new path.  The highest priority group will be
+	 * at the start of the valid list.
+	 */
+	list_for_each_entry (pi, &lc->valid_paths, list) {
+		if (pi->priority > priority)
 			break;
-		}
 
-		/* Select path with less/equal latency */
-		if (path->latency <= latency) {
-			latency = path->latency;
-			ret = path;
-			break;
+		priority = pi->priority;
+		if (pi->latency < latency) {
+			best = pi;
+			latency = pi->latency;
 		}
 	}
 
-	/* There's still at least one with this group priority */
-	if (ret) {
-		if (!path->io_count)
-			path->io_count = path->queue_min;
-	/* None with this group priority available,
-	   try another priority group */
-	} else if (high_path) {
-		lc->prio_group = high_prio;
-		ret = high_path;
-	}
+	if (best)
+		best->io_count = 0;
 
-	return ret;
+	lc->last_path = best;
+	return best;
 }
 
 /* Path selector */
 static struct path *latency_select_path(struct path_selector *ps,
-					struct buffer_head *bh, int rw,
-					struct path_info *path_context)
+					struct bio *bio,
+					union map_info *info)
 {
-	unsigned long flags;
 	struct latency_c *lc = (struct latency_c *) ps->context;
-	struct path_c *path;
+	unsigned long flags;
+	struct path_info *pi;
+	struct io *io;
 
 	spin_lock_irqsave(&lc->lock, flags);
-	path = _path(lc);
+	pi = __select_path(lc);
 	spin_unlock_irqrestore(&lc->lock, flags);
 
-	if (path) {
-		path_context->ll = jiffies;
-		path_context->ptr = (void *) path;
-		return path->path; /* Return opaque caller path */
+	if (pi) {
+		pi->io_count += to_sector(bio->bi_size);
+
+		io = mempool_alloc(lc->io_pool, GFP_NOIO);
+		io->start = jiffies;
+		io->pi = pi;
+		info->ptr = io;
+		return pi->path;
 	}
 
 	return NULL;
@@ -264,42 +315,56 @@
 
 /* Path end IO */
 static void latency_endio(struct path_selector *ps,
-			  struct buffer_head *bh,
-			  int rw, int error,
-			  struct path_info *path_context)
+			  struct bio *bio, int error,
+			  union map_info *info)
 {
+	struct latency_c *lc = (struct latency_c *) ps->context;
 	unsigned long flags;
 	jiffy_t j;
-	struct path_c *path = path_context->ptr;
+	struct io *io = (struct io *) info->ptr;
+	struct path_info *pi = io->pi;
 
-	BUG_ON(!path);
+	j = jiffies - io->start;
 
-	j = jiffies - path_context->ll;
+	spin_lock_irqsave(&pi->lock, flags);
+	pi->latency_tot += j;
+	pi->latency_count++;
+	spin_unlock_irqrestore(&pi->lock, flags);
 
-	/* Put heavy weight on long latencies */
-	j *= j;
-	j *= j;
-
-	spin_lock_irqsave(&path->lock, flags);
-	path->latency += j;
-	spin_unlock_irqrestore(&path->lock, flags);
+	mempool_free(io, lc->io_pool);
 }
 
 /* Path status */
-static int latency_status(void *context, status_type_t type,
-				  char *result, unsigned int maxlen)
+static int latency_status(struct path_selector *ps,
+			  struct path *path,
+			  status_type_t type,
+			  char *result, unsigned int maxlen)
 {
-	struct path_c *path =
-		(struct path_c *) context;
+	struct latency_c *lc = (struct latency_c *) ps->context;
+	unsigned long flags;
+	struct path_info *pi;
+
+	spin_lock_irqsave(&lc->lock, flags);
+
+	pi = path_lookup(&lc->valid_paths, path);
+	if (!pi)
+		pi = path_lookup(&lc->invalid_paths, path);
+
+	spin_unlock_irqrestore(&lc->lock, flags);
+
+	if (!pi) {
+		DMWARN("unknown path");
+		return -EINVAL;
+	}
 
 	switch(type) {
 	case STATUSTYPE_INFO:
-		snprintf(result, maxlen, "%llu ", path->latency);
+		snprintf(result, maxlen, "%llu ", pi->latency);
 		break;
 
 	case STATUSTYPE_TABLE:
-		snprintf(result, maxlen, "%d %d ",
-			 path->priority, path->queue_min);
+		snprintf(result, maxlen, "%u " SECTOR_FORMAT " ",
+			 pi->priority, pi->io_min);
 		break;
 	}
 
--- diff/drivers/md/dm-mpath.c	2003-12-29 10:16:02.000000000 +0000
+++ source/drivers/md/dm-mpath.c	2003-12-29 10:16:08.000000000 +0000
@@ -11,6 +11,7 @@
 
 #include "dm.h"
 #include "dm-daemon.h"
+#include "dm-path-selector.h"
 
 #include <linux/ctype.h>
 #include <linux/init.h>
@@ -20,355 +21,574 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <asm/atomic.h>
-#include "dm-path-selector.h"
 
-/* Multipath context */
-struct multipath_c {
+/* Path properties */
+struct path {
 	struct list_head list;
 
-	struct list_head paths;		/* List of paths */
-	struct list_head io_jobs;	/* IO jobs */
-
-	struct path_selector ps;
-	struct dm_target *ti;
+	struct dm_dev *dev;
+	int fail_limit;
 
-	spinlock_t lock;		/* Lock access to this context */
-	atomic_t io_count;		/* IOs in flight for this context */
+	int has_failed;
+	jiffy_t fail_time;
+	atomic_t fail_count;
+	atomic_t fail_total;
 
-	unsigned int scrub_interval;	/* Set in constructor */
+	unsigned failback_interval;
 
-	atomic_t events;		/* # if table events to throw */
-	atomic_t suspended;		/* device suspension */
+	sector_t test_sector;
 };
 
-/* Multipath io job */
-struct path;
-struct multipath_io {
+/* Multipath context */
+struct multipath {
 	struct list_head list;
+	struct dm_target *ti;
+	struct path_selector ps;
 
-	struct multipath_c *mc;
-	struct path *path;
-
-	struct buffer_head *bh;
-	int rw;
-
-	/* Path selector context between ps->type->select_path()
-	  and ps->type->endio() */
-	struct path_info path_context;
-};
+	struct list_head paths;
 
+	spinlock_t failed_lock;
+	struct bio *failed_ios;
 
-/* Path flags */
-enum {
-	FAILED,
-	SCRUB_IO,
+	unsigned test_interval;
+//	atomic_t suspended;		/* device suspension */
+//	int throw_event;
 };
 
-/* Path properties */
-struct path {
-	struct list_head list;
-
-	struct dm_dev *dev;
-	struct multipath_c *mc;		/* Back pointer to multipath context */
-
-	unsigned long flags;		/* See path flags above */
-
-	/* set in target ctr */
-	int reactivation_interval;	/* Automatic reactivation interval */
-	int fail_max;			/* Maximum failures allowed */
-
-	jiffy_t io_jiffies;		/* Jiffies of last IO queued */
-	atomic_t fail;			/* actual failure count vs. fail_max */
-	atomic_t fail_total;		/* Total failures on this path */
-
-	void *ps_private;	   	/* Opaque pointer to path selector object */
-	unsigned long test_sector; 	/* Path scrubbing sector */
-};
+static struct path *alloc_path(void)
+{
+	struct path *path = kmalloc(sizeof(*path), GFP_KERNEL);
 
-/*
- * Various functions to set a single/all path(s) (in)operational,
- * check if path(s) is/are operational and (un)fail a path, allocate
- * and deallocate io job memory...
- */
+	if (path) {
+		memset(path, 0, sizeof(*path));
+		atomic_set(&path->fail_count, 0);
+		atomic_set(&path->fail_total, 0);
+	}
 
-/* Set/Rretrieve jiffies of last IO on this path */
-static inline void set_io_jiffies(struct path *path)
-{
-	path->io_jiffies = jiffies;
+	return path;
 }
 
-static inline jiffy_t get_io_jiffies(struct path *path)
+static inline void free_path(struct path *p)
 {
-	return path->io_jiffies;
+	kfree(p);
 }
 
-/* "Queue" an event on a table in order to process
-   dm_table_event() calls in task context */
-static inline void queue_table_event(struct multipath_io *io)
+static struct multipath *alloc_multipath(void)
 {
-	struct multipath_c *mc = (struct multipath_c *) io->mc;
+	struct multipath *m;
 
-	atomic_inc(&mc->events);
-}
+	m = kmalloc(sizeof(*m), GFP_KERNEL);
+	if (m) {
+		memset(m, 0, sizeof(*m));
+		INIT_LIST_HEAD(&m->paths);
+	}
 
-/* Check path failed */
-static inline int is_failed(struct path *path)
-{
-	return test_bit(FAILED, &path->flags);
+	return m;
 }
 
-/* Set a path to "failed" */
-static inline void set_failed(struct multipath_io *io)
+static void free_multipath(struct multipath *m)
 {
-	struct path *path = io->path;
-	struct path_selector *ps = &path->mc->ps;
+	struct path_selector *ps;
+	struct path *path, *tmp;
 
-	if (is_failed(path))
+	if (!m)
 		return;
 
-	atomic_inc(&path->fail_total);
-	io->path->test_sector = io->bh->b_rsector;
-	ps->type->set_path_state(path->ps_private, 1);
-	queue_table_event(io);
+	ps = &m->ps;
+
+	if (ps) {
+		ps->type->dtr(ps);
+		dm_put_path_selector(ps->type);
+	}
+
+	list_for_each_entry_safe (path, tmp, &m->paths, list) {
+		list_del(&path->list);
+		dm_put_device(m->ti, path->dev);
+		free_path(path);
+	}
+
+	kfree(m);
 }
 
-/* Reset failure information on a path */
-static inline void reset_failures(struct path *path)
+/*-----------------------------------------------------------------
+ * The multipath daemon is responsible for resubmitting failed ios.
+ *---------------------------------------------------------------*/
+static struct dm_daemon _kmpathd;
+
+static LIST_HEAD(_mpaths);
+static spinlock_t _mpath_lock = SPIN_LOCK_UNLOCKED;
+
+static void dispatch_failed_ios(struct multipath *m)
 {
-	struct path_selector *ps = &path->mc->ps;
+	unsigned long flags;
+	struct bio *bio, *next_bio;
 
-	path->test_sector = 0;
-	atomic_set(&path->fail, path->fail_max);
-	clear_bit(FAILED, &path->flags);
-	ps->type->set_path_state(path->ps_private, 0);
+	spin_lock_irqsave(&m->failed_lock, flags);
+	bio = m->failed_ios;
+	m->failed_ios = NULL;
+	spin_unlock_irqrestore(&m->failed_lock, flags);
+
+	while (bio) {
+		next_bio = bio->bi_next;
+		bio->bi_next = NULL;
+		generic_make_request(bio);
+		bio = next_bio;
+	}
 }
 
-/* Reset a "failed" path
- * (IOW: set it to operational so that it can be selected for IO submission)
- */
-static inline void reset_failed(struct multipath_io *io)
+/* Requeue error ios */
+static void do_ios(void)
 {
-	struct path *path = io->path;
+	struct multipath *m;
 
-	if (_is_failed(path)) {
-		reset_failures(path);
-		queue_table_event(io);
-	}
+	spin_lock(&_mpath_lock);
+	list_for_each_entry (m, &_mpaths, list)
+		dispatch_failed_ios(m);
+	spin_unlock(&_mpath_lock);
+
+	blk_run_queues();
 }
 
-/* Scrub IO handling */
-static inline void reset_scrub_io(struct path *path)
+/* Multipathd does this every time it runs, returns a sleep duration hint */
+static jiffy_t do_work(void)
 {
-	clear_bit(SCRUB_IO, &path->flags);
+	do_ios();
+//	do_table_events();
+//	return do_scrubbing();
+	return 0;
 }
 
+/*-----------------------------------------------------------------
+ * Constructor/argument parsing
+ *	<nr paths> <nr path parms> <path_test_interval>
+ *	<path_selector_name> <num_ps_parms>
+ *	[<device_path> <failback interval> <io failures>
+ *	 <ps path args>{num_ps_parms}
+ *	]{nr path parms}
+ *---------------------------------------------------------------*/
+struct param {
+	unsigned min;
+	unsigned max;
+	char *error;
+};
 
-/* Scrub timeout calculation */
-static inline unsigned long get_reactivation_timeout(struct path *path)
+#define ESTR(s) ("dm-multipath: " s)
+
+static int read_param(struct param *param, char *str, unsigned *v, char **error)
 {
-	return path->reactivation_interval * HZ;
+	if ((sscanf(str, "%u", v) != 1) ||
+	    (*v < param->min) ||
+	    (*v > param->max)) {
+		*error = param->error;
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
-static inline unsigned long get_scrub_timeout(struct path *path)
+static int parse_path(struct multipath *m, int argc, char **argv, struct dm_target *ti)
 {
-	return path->mc->scrub_interval * HZ;
+	/* path parameters */
+	static struct param _params[] = {
+		{0, 1024, ESTR("invalid path reactivation interval")},
+		{0, 1024, ESTR("invalid io failures")}
+	};
+
+
+	int r;
+	struct path *p;
+
+	p = alloc_path();
+	if (!p)
+		return -ENOMEM;
+
+	r = dm_get_device(ti, argv[0], ti->begin, ti->len,
+			  dm_table_get_mode(ti->table), &p->dev);
+	if (r) {
+		ti->error = "dm-multipath: error getting device";
+		goto bad;
+	}
+
+	r = read_param(_params, argv[1], &p->failback_interval, &ti->error);
+	if (r)
+		goto bad;
+
+	r = read_param(_params + 1, argv[2], &p->fail_limit, &ti->error);
+	if (r)
+		goto bad;
+
+	r = m->ps.type->add_path(&m->ps, p, argc - 2, argv + 2, &ti->error);
+	if (r)
+		goto bad;
+
+	list_add_tail(&p->list, &m->paths);
+	return 0;
+
+ bad:
+	free_path(p);
+	return r;
 }
 
-/* Calculate scrubbing sleep timeout for deamon */
-static inline int scrub_timeout(struct path *path, long *timeout)
+#define	MIN_PARMS	5
+static int multipath_ctr(struct dm_target *ti, unsigned int argc,
+			 char **argv)
 {
-	int ret = 0;
-	jiffy_t j = get_io_jiffies(path);
-	jiffy_t t = is_failed(path) ? get_reactivation_timeout(path) :
-		get_scrub_timeout(path);
+	/* target parameters */
+	static struct param _params[] = {
+		{2, 1024, ESTR("invalid number of paths")},
+		{2, 32, ESTR("invalid number of path parameters")},
+		{1, 24*60*60, ESTR("invalid path test interval")},
+		{0, 1024, ESTR("invalid path selector parameters")}
+	};
 
-	if (t) {
-		/* Jiffies wrap around check */
-		if (jiffies < j) {
-			*timeout = HZ;
-			set_io_jiffies(path);
-			return 1;
-		}
+	int r, i;
+	struct multipath *m;
+	struct path_selector_type *pst;
+	unsigned nr_paths, nr_params, nr_ps_params;
 
-		j += t;
-		if (jiffies < j)
-			j -= jiffies;
-		else {
-			j = t;
-			ret = 1;
+	/* Check minimum argument count */
+	if (argc < MIN_PARMS) {
+		ti->error = ESTR("not enough arguments");
+		return -EINVAL;
+	}
+
+	m = alloc_multipath();
+	if (!m) {
+		ti->error = ESTR("can't allocate multipath context");
+		return -EINVAL;
+	}
+
+	r = read_param(_params, argv[0], &nr_paths, &ti->error);
+	if (r)
+		goto bad;
+
+	/* there must be at least 2 paths */
+	if (nr_paths < 2) {
+		ti->error = ESTR("not enough paths");
+		goto bad;
+	}
+
+	r = read_param(_params + 1, argv[1], &nr_params, &ti->error);
+	if (r)
+		goto bad;
+
+	if (nr_params != 2) {
+		ti->error = ESTR("invalid number of path args");
+		goto bad;
+	}
+
+	r = read_param(_params + 2, argv[2], &m->test_interval, &ti->error);
+	if (r)
+		goto bad;
+
+	pst = dm_get_path_selector(argv[3]);
+	if (!pst) {
+		ti->error = ESTR("unknown path selector type");
+		goto bad;
+	}
+
+	r = pst->ctr(&m->ps);
+	if (r) {
+		/* FIXME: put the pst ? */
+		goto bad;
+	}
+
+	r = read_param(_params + 3, argv[4], &nr_ps_params, &ti->error);
+	if (r)
+		goto bad;
+
+	/* Loop through all paths parsing their parameters */
+	argc -= 5; argv += 5;
+	nr_params += nr_ps_params + 1;
+	for (i = 0; i < nr_paths; i++) {
+
+		if (argc < nr_params) {
+			ti->error = ESTR("insufficient arguments");
+			goto bad;
 		}
 
-		if (*timeout > j)
-			*timeout = (long) j;
+		r = parse_path(m, nr_params, argv, ti);
+		if (r)
+			goto bad;
+
+		argc -= nr_params; argv += nr_params;
 	}
 
-	return ret;
+	ti->private = m;
+	m->ti = ti;
+
+	spin_lock(&_mpath_lock);
+	list_add(&_mpaths, &m->list);
+	spin_unlock(&_mpath_lock);
+
+	return 0;
+
+ bad:
+	free_multipath(m);
+	return -EINVAL;
 }
 
-/* Push a job onto the tail of a job queue */
-static inline void push(struct list_head *joblist,
-			struct list_head *job,
-			spinlock_t *lock)
+/* Destruct a multipath mapping */
+static void multipath_dtr(struct dm_target *ti)
 {
-	unsigned long flags;
+	struct multipath *m = (struct multipath *) ti->private;
 
-	spin_lock_irqsave(lock, flags);
-	list_add_tail(job, joblist);
-	spin_unlock_irqrestore(lock, flags);
+//	wait_for_scrub_ios(m);
+	spin_lock(&_mpath_lock);
+	list_del(&m->list);
+	spin_unlock(&_mpath_lock);
+
+	free_multipath(m);
 }
 
-/* Pop an IO job off a job queue */
-static inline struct multipath_io *pop(struct list_head *jobs,
-				       spinlock_t *lock)
+/* Set a path to "failed" */
+static inline void set_failed(struct path_selector *ps, struct path *path, sector_t sector)
 {
-	unsigned long flags;
-	struct multipath_io *io;
+	if (path->has_failed)
+		return;
 
-	spin_lock_irqsave(lock, flags);
-	if (list_empty(jobs))
-		io = NULL;
-	else {
-		io = list_entry(jobs->next, struct multipath_io, list);
-		list_del(jobs->next);
-	}
-	spin_unlock_irqrestore(lock, flags);
+	/* FIXME: need locking ? */
+	path->fail_time = jiffies;
+	atomic_inc(&path->fail_total);
+	path->test_sector = sector;
+	ps->type->set_path_state(ps, path, 0);
+//	queue_table_event(io);
+}
 
-	return io;
+/*
+ * Only called on the slow, error path.
+ */
+static struct path *find_path(struct multipath *m, struct block_device *bdev)
+{
+	struct path *p;
+
+	list_for_each_entry(p, &m->paths, list)
+		if (p->dev->bdev == bdev)
+			return p;
+
+	return NULL;
 }
 
+static int multipath_end_io(struct dm_target *ti, struct bio *bio,
+			    int error, union map_info *map_context)
+{
+	struct multipath *m = (struct multipath *) ti->private;
+	struct path_selector *ps = &m->ps;
+	struct path_selector_type *pst = ps->type;
+	ps_endio_fn endio = pst->endio;
+	unsigned long flags;
+
+	if (error) {
+		struct path *path = find_path(m, bio->bi_bdev);
 
-/*-----------------------------------------------------------------
- * IO job allocation/deallocation
- *---------------------------------------------------------------*/
+		if (atomic_dec_and_test(&path->fail_count))
+			set_failed(ps, path, bio->bi_sector);
 
-/* Slab for the io jobs */
-static kmem_cache_t *_multipath_cache;
-mempool_t *_multipath_pool;
+		/* choose a new path */
+		path = pst->select_path(ps, bio, map_context);
+		if (path) {
+			bio->bi_bdev = path->dev->bdev;
+			spin_lock_irqsave(&m->failed_lock, flags);
+			bio->bi_next = m->failed_ios;
+			m->failed_ios = bio;
+			spin_unlock_irqrestore(&m->failed_lock, flags);
 
-static int ios = 0;
-#define	DEFAULT_IOS	256
-#define	MIN_IOS		16
-#define	MAX_IOS		32768	/* maximum on 32 bit hw with mempool_create */
+			dm_daemon_wake(&_kmpathd);
+			return 1;	/* io not complete */
+		}
+	}
+
+	if (endio)
+		endio(ps, bio, error, map_context);
+
+	return 0;		/* io complete */
+}
 
-static inline struct multipath_io *alloc_io(void)
+/* Suspend */
+static void multipath_suspend(struct dm_target *ti)
 {
-	return mempool_alloc(_multipath_pool, GFP_NOIO);
+	struct multipath *m = (struct multipath *) ti->private;
+
+	//atomic_set(&m->suspended, 1);
+	//wait_for_scrub_ios(m);
 }
 
-static inline void free_io(struct multipath_io *io)
+/* Resume */
+static void multipath_resume(struct dm_target *ti)
 {
-	mempool_free(io, _multipath_pool);
+	struct multipath *m = (struct multipath *) ti->private;
+
+	//atomic_set(&m->suspended, 0);
+	dm_daemon_wake(&_kmpathd);
 }
 
-/* Multipath context allocation */
-static inline struct multipath_c *alloc_context(void)
+/* Multipath mapping */
+static int multipath_map(struct dm_target *ti, struct bio *bio,
+			 union map_info *map_context)
 {
-	struct multipath_c *mc = kmalloc(sizeof(*mc), GFP_KERNEL);
+	struct multipath *m = (struct multipath *) ti->private;
+	struct path *path;
 
-	if (mc) {
-		memset(mc, 0, sizeof(*mc));
-		INIT_LIST_HEAD(&mc->io_jobs);
-		INIT_LIST_HEAD(&mc->paths);
-		mc->lock = SPIN_LOCK_UNLOCKED;
-		atomic_set(&mc->io_count, 0);
-		atomic_set(&mc->events, 0);
-		atomic_set(&mc->suspended, 0);
-	}
+	/* Ask path selector for a path */
+	path = m->ps.type->select_path(&m->ps, bio, map_context);
+	if (!path)
+		return -1;	/* No valid path found */
 
-	return mc;
+	bio->bi_bdev = path->dev->bdev;
+	return 1;
 }
 
-/* Path context allocation */
-static inline struct path *alloc_path(void)
+/* Multipath status */
+static int multipath_status(struct dm_target *ti, status_type_t type,
+			    char *result, unsigned int maxlen)
 {
-	struct path *path = kmalloc(sizeof(*path), GFP_KERNEL);
+	return 0;
+}
 
-	if (path) {
-		memset(path, 0, sizeof(*path));
-		atomic_set(&path->fail_total, 0);
+/*-----------------------------------------------------------------
+ * Module setup
+ *---------------------------------------------------------------*/
+static struct target_type multipath_target = {
+	.name = "multipath",
+	.module = THIS_MODULE,
+	.ctr = multipath_ctr,
+	.dtr = multipath_dtr,
+	.map = multipath_map,
+	.end_io = multipath_end_io,
+	.suspend = multipath_suspend,
+	.resume = multipath_resume,
+	.status = multipath_status,
+};
+
+int __init dm_multipath_init(void)
+{
+	int r;
+
+	r = dm_register_target(&multipath_target);
+	if (r < 0) {
+		DMERR("%s: register failed %d", multipath_target.name, r);
+		return -EINVAL;
 	}
 
-	return path;
+	r = dm_register_path_selectors();
+	if (r && r != -EEXIST) {
+		dm_unregister_target(&multipath_target);
+		return r;
+	}
+
+	r = dm_daemon_start(&_kmpathd, "kpathd", do_work);
+	if (r) {
+		dm_unregister_path_selectors();
+		dm_unregister_target(&multipath_target);
+	} else
+		DMINFO("dm_multipath v0.2.0");
+
+	return r;
 }
 
-static void free_context(struct multipath_c *mc)
+void __exit dm_multipath_exit(void)
 {
-	struct list_head *elem, *tmp;
-	struct path_selector *ps = &mc->ps;
+	int r;
 
-	if (!mc)
-		return;
+	dm_daemon_stop(&_kmpathd);
+	dm_unregister_path_selectors();
+	r = dm_unregister_target(&multipath_target);
+	if (r < 0)
+		DMERR("%s: target unregister failed %d",
+		      multipath_target.name, r);
+}
+
+/* Module hooks */
+module_init(dm_multipath_init);
+module_exit(dm_multipath_exit);
+
+MODULE_DESCRIPTION(DM_NAME " multipath target");
+MODULE_AUTHOR("Heinz Mauelshagen <mge@sistina.com>");
+MODULE_LICENSE("GPL");
 
-	ps->type->dtr(ps);
-	dm_put_path_selector(ps->type);
 
-	list_for_each_safe(elem, tmp, &mc->paths) {
-		struct path *path = list_entry(elem, struct path, list);
 
-		list_del(elem);
-		if (path->dev)
-			dm_put_device(mc->ti, path->dev);
 
-		kfree(path);
-	}
 
-	kfree(mc);
-}
 
-/*-----------------------------------------------------------------
- * The multipath daemon is responsible for periodically
- * retestings failed paths and resubmitting failed ios.
- *---------------------------------------------------------------*/
-static struct dm_daemon _kmultipathd;
 
-static LIST_HEAD(_mc_jobs);
-static spinlock_t _job_lock = SPIN_LOCK_UNLOCKED;
 
-/* Submit an IO and store the IO timestamp */
-static inline void make_request(struct multipath_io *io)
+#ifdef SCRUB_STUFF
+/* Reset failure information on a path */
+static inline void reset_failures(struct path *path)
 {
-	set_io_jiffies(io->path);
-	generic_make_request(io->rw, io->bh);
+	struct path_selector *ps = &path->m->ps;
+
+	path->test_sector = 0;
+	atomic_set(&path->fail, path->fail_limit);
+	clear_bit(FAILED, &path->flags);
+	ps->type->set_path_state(path->ps_private, 0);
 }
 
-/* Requeue error ios */
-static inline void do_ios(void)
+/* Reset a "failed" path
+ * (IOW: set it to operational so that it can be selected for IO submission)
+ */
+static void reset_failed(struct multipath_io *io)
 {
-	unsigned long flags;
-	struct multipath_c *mc;
-	struct multipath_io *io;
+	struct path *path = io->path;
 
-	spin_lock_irqsave(&_job_lock, flags);
-	list_for_each_entry(mc, &_mc_jobs, list) {
-		while ((io = pop(&mc->io_jobs, &mc->lock)))
-			make_request(io);
+	if (is_failed(path)) {
+		reset_failures(path);
+		queue_table_event(io);
 	}
-	spin_unlock_irqrestore(&_job_lock, flags);
+}
 
-	run_task_queue(&tq_disk);
+/* Scrub IO handling */
+static inline void reset_scrub_io(struct path *path)
+{
+	clear_bit(SCRUB_IO, &path->flags);
 }
 
-/* Work all table events thrown */
-static inline void do_table_events(void)
+
+/* Scrub timeout calculation */
+static inline unsigned long get_reactivation_timeout(struct path *path)
 {
-	unsigned long flags;
-	struct multipath_c *mc;
+	return path->reactivation_interval * HZ;
+}
 
-	/* FIXME: optimize this in case no events need to be thrown
-	   (which is most of the time) */
-	spin_lock_irqsave(&_job_lock, flags);
-	list_for_each_entry(mc, &_mc_jobs, list) {
-		/* Throw all events queued */
-		while (atomic_read(&mc->events)) {
-			dm_table_event(mc->ti->table);
-			atomic_dec(&mc->events);
+static inline unsigned long get_scrub_timeout(struct path *path)
+{
+	return path->m->scrub_interval * HZ;
+}
+
+/* Calculate scrubbing sleep timeout for deamon */
+static int scrub_timeout(struct path *path, long *timeout)
+{
+	int ret = 0;
+	jiffy_t j = get_fail_time(path);
+	jiffy_t t = is_failed(path) ? get_reactivation_timeout(path) :
+		get_scrub_timeout(path);
+
+	if (t) {
+		/* Jiffies wrap around check */
+		if (jiffies < j) {
+			*timeout = HZ;
+			set_fail_time(path);
+			return 1;
+		}
+
+		j += t;
+		if (jiffies < j)
+			j -= jiffies;
+		else {
+			j = t;
+			ret = 1;
 		}
+
+		if (*timeout > j)
+			*timeout = (long) j;
 	}
-	spin_unlock_irqrestore(&_job_lock, flags);
+
+	return ret;
 }
 
 /* Allocate a scrubing IO buffer_head and page */
-static inline struct buffer_head *alloc_scrub_bh(void)
+static  struct buffer_head *alloc_scrub_bh(void)
 {
 	struct buffer_head *bh = kmalloc(sizeof(*bh), GFP_NOIO);
 
@@ -393,7 +613,7 @@
 }
 
 /* Free a scrubing IO page and buffer_head */
-static inline void free_scrub_bh(struct buffer_head *bh)
+static void free_scrub_bh(struct buffer_head *bh)
 {
 	UnlockPage(bh->b_page);
 	__free_page(bh->b_page);
@@ -403,22 +623,20 @@
 /* Scrubbing end IO function */
 static void multipath_scrub_end_io(struct buffer_head *bh, int uptodate)
 {
-	struct multipath_io *io = (struct multipath_io *) bh->b_private;
-	struct multipath_c *mc = (struct multipath_c *) io->mc;
+	struct multipath *m = (struct multipath *) io->m;
 
 	if (uptodate) {
 		unsigned long flags;
 
-		spin_lock_irqsave(&mc->lock, flags);
+		spin_lock_irqsave(&m->lock, flags);
 		reset_failed(io);
-		spin_unlock_irqrestore(&mc->lock, flags);
+		spin_unlock_irqrestore(&m->lock, flags);
 
 		dm_daemon_wake(&_kmultipathd);
 	}
 
 	reset_scrub_io(io->path);
 	free_scrub_bh(io->bh);
-	free_io(io);
 }
 
 /*
@@ -430,7 +648,7 @@
  *	1: scrub IO queued
  *
  */
-static inline int queue_scrub_io(struct path *path)
+static int queue_scrub_io(struct path *path)
 {
 	struct multipath_io *io;
 	struct buffer_head *bh;
@@ -442,16 +660,10 @@
 	if (!bh)
 		goto retry;	/* just retry later */
 
-	/* Setup io */
-	io = alloc_io();
-
-	io->mc = path->mc;
-	io->path = path;
-	io->bh = bh;
-	io->rw = READ;
-
-	/* no need to set b_dev, b_blocknr, b_count
-	   or initialize the wait queue here */
+	/*
+	 * No need to set b_dev, b_blocknr, b_count or initialize
+	 * the wait queue here.
+	 */
 	bh->b_rdev = path->dev->dev;
 	bh->b_rsector = path->test_sector;
 	bh->b_end_io = multipath_scrub_end_io;
@@ -473,341 +685,105 @@
  * Check if paths need to get a test io queued either for
  * automatic failure recovery or scrubbing of idle paths.
  */
-static inline long do_scrubbing(void)
+static long do_scrubbing(void)
 {
 	unsigned long flags;
 	long timeout = MAX_SCHEDULE_TIMEOUT;
-	struct multipath_c *mc;
+	struct multipath *m;
 
 	/* FIXME: optimize this in case no scrubbing is needed */
-	spin_lock_irqsave(&_job_lock, flags);
-	list_for_each_entry(mc, &_mc_jobs, list) {
+	spin_lock_irqsave(&_mpath_lock, flags);
+	list_for_each_entry (m, &_mpaths, list) {
 		struct path *path;
 
-		/* Don't scrub suspended mcs */
-		if (atomic_read(&mc->suspended))
+		/* Don't scrub suspended ms */
+		if (atomic_read(&m->suspended))
 			continue;
 
-		list_for_each_entry(path, &mc->paths, list) {
+		list_for_each_entry (path, &m->paths, list) {
 			if (scrub_timeout(path, &timeout))
 				queue_scrub_io(path);
 		}
 	}
-	spin_unlock_irqrestore(&_job_lock, flags);
+	spin_unlock_irqrestore(&_mpath_lock, flags);
 
 	return timeout;
 }
 
-/* Multipathd does this every time it runs, returns a sleep duration hint */
-static inline jiffy_t do_work(void)
+static void wait_for_scrub_ios(struct multipath *m)
 {
-	do_ios();
-	do_table_events();
-	return do_scrubbing();
-}
-
-
-/*-----------------------------------------------------------------
- * Constructor/argument parsing
- *---------------------------------------------------------------*/
-
-#define ARG_FORMAT	"%d"
-
-/* range checks for target definition in _get_path() */
-#define	PARM_MIN	0		/* mininum parameters */
-#define	PARM_MAX	1024		/* maximum	" */
-
-#define	PATH_PARM_MIN	2	/* min path parameters */
-#define	PATH_PARM_MAX	2	/* max		" */
-
-#define	SCRUB_MIN	1		/* min scrubbing interval in seconds */
-#define	SCRUB_MAX	24*60*60	/* max		" */
-
-/* Path flags */
-#define	PATHS_MIN	2	/* min number of paths */
-#define	PATHS_MAX	1024	/* max		" */
-
-#define xx(av, a, s, c, v) \
-        if (sscanf(av[a], ARG_FORMAT, &tmp) != 1 || \
-            tmp < c ## _MIN || \
-            tmp > c ## _MAX) { \
-		_free_context(mc); \
-                ti->error = "dm-multipath: Invalid " s; \
-                return -EINVAL; \
-        } \
-	v = tmp;
-
-/*
- * Parse a
- *
- *	<num_paths> <num_path_parms> <path_scrubbing_interval>
- *	<path_selector_name> <num_ps_parms>
- *	[<device_path> <reactivation_interval> <max_failures>
- *       <priority_group> <queue_len>
- *	{2,num_paths}]
- *
- * parameter set and construct a multipath context
- *
- */
-#define	MIN_PARMS	5
-static int multipath_ctr(struct dm_target *ti, unsigned int argc,
-			 char **argv)
-{
-	int a, parms, paths, path_parms, scrub_interval, ps_parms, tmp;
-	char **av;
-	struct multipath_c *mc = NULL;
-	struct path_selector_type *pst;
 	struct path *path;
 
-	if (argc < MIN_PARMS)	/* Check minimum argument count */
-		goto bad_parms;
-
-	xx(argv, 0, "number of paths", PATHS, paths);
-	if (paths < 2)
-		goto bad_paths;
-	xx(argv, 1, "number of path parameters", PATH_PARM, path_parms);
-	xx(argv, 2, "path scrubbing interval", SCRUB, scrub_interval);
-	xx(argv, 4, "path selector parameters", PARM, ps_parms);
-
-	parms = path_parms + ps_parms;
-	if (MIN_PARMS + paths * parms != argc)
-		goto bad_parms;
-
-	mc = alloc_context();
-	if (!mc)
-		goto bad_context;
-
-	pst = dm_get_path_selector(argv[3]);
-	if (!pst)
-		goto bad_ps;
-
-	if (pst->ctr(&mc->ps, 0, NULL, &ti->error))
-		goto bad_ps_ctr;
-
-	mc->scrub_interval = scrub_interval;
-
-	/* Loop through all paths parsing their parameters */
-	av = &argv[MIN_PARMS];
-	for (a = MIN_PARMS; a < argc; a += parms, av += parms) {
-		void *path_c;
-
-		path = alloc_path();
-		if (!path)
-			goto bad_alloc_path;
-
-		/* Add path to the list first, so that _free_context()
-		   is able to free it on error */
-		list_add_tail(&path->list, &mc->paths);
-
-		xx(av, 1, "path reactivation interval", PARM,
-		   path->reactivation_interval);
-		xx(av, 2, "maximum path failures", PARM, path->fail_max);
-
-		if (dm_get_device(ti, av[0], ti->begin, ti->len,
-				  dm_table_get_mode(ti->table), &path->dev))
-			goto bad_dm_get_device;
-
-		path_c = mc->ps.type->add_path(&mc->ps, path,
-					       ps_parms, &av[3], &ti->error);
-		if (!path_c)
-			goto bad_ps_add;
-
-		path->ps_private = path_c;
-		path->mc = mc;
-		reset_failures(path);
-	}
-
-	ti->private = mc;
-        ti->error = NULL;
-	mc->ti = ti;
-	push(&_mc_jobs, &mc->list, &_job_lock);
-
-	return 0;
-
-bad_parms:
-	ti->error = "dm-multipath: not enough arguments";
-	return -EINVAL;
-
-bad_paths:
-	ti->error = "dm-multipath: not enough paths";
-	return -EINVAL;
-
-bad_context:
-	ti->error = "dm-multipath: can't allocate multipath context";
-	return -ENOMEM;
-
-bad_ps:
-	free_context(mc);
-	ti->error = "dm-multipath: invalid path selector";
-	return -EINVAL;
-
-bad_ps_ctr:
-	free_context(mc);
-	ti->error = "dm-multipath: error path selector constructor";
-       	return -ENXIO;
-
-bad_alloc_path:
-	free_context(mc);
-	ti->error = "dm-multipath: can't allocate path context";
-	return -ENOMEM;
-
-bad_dm_get_device:
-	free_context(mc);
-	ti->error = "dm-multipath: error getting device";
-       	return -ENXIO;
-
-bad_ps_add:
-	free_context(mc);
-	ti->error = "dm-multipath: error add path";
-       	return -ENXIO;
-}
-#undef xx
-
-static void wait_for_scrub_ios(struct multipath_c *mc)
-{
-	struct path *path;
-
-	list_for_each_entry(path, &mc->paths, list) {
+	list_for_each_entry (path, &m->paths, list) {
 		while (test_bit(SCRUB_IO, &path->flags))
 			schedule_timeout(HZ / 2);
 	}
 }
 
-static inline void remove_mc_job(struct multipath_c *mc)
-{
-	unsigned long flags;
-	struct multipath_c *mc_tmp;
 
-	spin_lock_irqsave(&_job_lock, flags);
-	list_for_each_entry(mc_tmp, &_mc_jobs, list) {
-		if (mc == mc_tmp) {
-			list_del(&mc->list);
-			break;
-		}
-	}
-	spin_unlock_irqrestore(&_job_lock, flags);
-}
+#endif
 
-/* Destruct a multipath mapping */
-static void multipath_dtr(struct dm_target *ti)
-{
-	struct multipath_c *mc = (struct multipath_c *) ti->private;
 
-	wait_for_scrub_ios(mc);
-	remove_mc_job(mc);
-	free_context(mc);
-}
 
-static inline void map(struct multipath_io *io, struct path *path)
-{
-	io->path = path;
-	io->bh->b_rdev = path->dev->dev;
-	set_io_jiffies(path);
-}
 
-static int multipath_end_io(struct dm_target *ti, struct buffer_head *bh,
-			    int rw, int error, union map_info *map_context)
+#ifdef EVENT_STUFF
+/* "Queue" an event on a table in order to process
+   dm_table_event() calls in task context */
+static inline void queue_table_event(struct multipath_io *io)
 {
-	int r = 0;
-	struct multipath_io *io = (struct multipath_io *) map_context->ptr;
-	struct multipath_c *mc = (struct multipath_c *) io->mc;
-	struct path_selector *ps = &mc->ps;
-	struct path *path = io->path;
-	struct path_selector_type *pst = ps->type;
-	ps_endio_fn ps_endio = pst->endio;
-
-	if (error) {
-		if (atomic_dec_and_test(&path->fail))
-			set_failed(io);
-
-		path = pst->select_path(ps, io->bh, io->rw, &io->path_context);
-		if (path) {
-			/* Map the IO to this new path */
-			map(io, path);
-			push(&mc->io_jobs, &io->list, &mc->lock);
-			dm_daemon_wake(&_kmultipathd);
+	struct multipath *m = (struct multipath *) io->m;
 
-			return 1;	/* Handle later */
-		}
-	}
-
-	/* Call path selector end IO method if registered */
-	if (ps_endio)
-		ps_endio(ps, io->bh, io->rw, error, &io->path_context);
-
-	free_io(io);
-
-	return r;
+	atomic_inc(&m->events);
 }
 
-/* Suspend */
-static void multipath_suspend(struct dm_target *ti)
+/* Work all table events thrown */
+static void do_table_events(void)
 {
-	struct multipath_c *mc = (struct multipath_c *) ti->private;
-
-	atomic_set(&mc->suspended, 1);
-	wait_for_scrub_ios(mc);
-}
+	unsigned long flags;
+	struct multipath *m;
 
-/* Resume */
-static void multipath_resume(struct dm_target *ti)
-{
-	struct multipath_c *mc = (struct multipath_c *) ti->private;
+	/* FIXME: optimize this in case no events need to be thrown
+	   (which is most of the time) */
+	spin_lock_irqsave(&_mpath_lock, flags);
+	list_for_each_entry (m, &_mpaths, list) {
 
-	atomic_set(&mc->suspended, 0);
-	dm_daemon_wake(&_kmultipathd);
+		/* Throw all events queued */
+		while (atomic_read(&m->events)) {
+			dm_table_event(m->ti->table);
+			atomic_dec(&m->events);
+		}
+	}
+	spin_unlock_irqrestore(&_mpath_lock, flags);
 }
 
-/* Multipath mapping */
-static int multipath_map(struct dm_target *ti, struct buffer_head *bh,
-			 int rw, union map_info *map_context)
-{
-	struct multipath_c *mc = (struct multipath_c *) ti->private;
-	struct path_selector *ps = &mc->ps;
-	struct multipath_io *io = alloc_io();
-	struct path *path;
-
-	/* Ask path selector for a path */
-	path = ps->type->select_path(ps, bh, rw, &io->path_context);
-	if (!path) { /* No valid path found */
-		free_io(io);
-		return -1;	/* Error */
-	}
 
-	io->mc = mc;
-	io->bh = bh;
-	io->rw = rw;
+#endif
 
-	map(io, path);			/* Map the IO to this path */
-	map_context->ptr = (void *) io;	/* Save for multipath_end_io() */
 
-	return 1;	/* Normal map */
-}
+#ifdef STATUS_FOO
 
-/* Multipath status */
-static int multipath_status(struct dm_target *ti, status_type_t type,
-			    char *result, unsigned int maxlen)
-{
 	int sz = 0;
-	struct multipath_c *mc = (struct multipath_c *) ti->private;
+	struct multipath *m = (struct multipath *) ti->private;
 	struct path *path;
-	ps_status_fn ps_status = mc->ps.type->status;
+	ps_status_fn ps_status = m->ps.type->status;
 
 	switch (type) {
 	case STATUSTYPE_INFO:
-		list_for_each_entry(path, &mc->paths, list) {
+		list_for_each_entry (path, &m->paths, list) {
 			sz += snprintf(result + sz, maxlen - sz, "%s ",
 				       dm_kdevname(to_kdev_t(path->dev->dev)));
-			if (_is_failed(path)) {
+			if (is_failed(path)) {
 				struct timespec fail;
 
-				jiffies_to_timespec(jiffies - path->io_jiffies, &fail);
-				sz += snprintf(result + sz, maxlen - sz, "I(%lu/" ARG_FORMAT ")", (unsigned long) fail.tv_sec, path->reactivation_interval);
+				jiffies_to_timespec(jiffies - path->fail_time, &fail);
+				sz += snprintf(result + sz, maxlen - sz, "I(%lu/" ARG_FORMAT ")",
+					       (unsigned long) fail.tv_sec, path->reactivation_interval);
 			} else {
 				sz += snprintf(result + sz, maxlen - sz, "O");
 
 				if (atomic_read(&path->fail_total))
-					sz += snprintf(result + sz, maxlen - sz, "[" ARG_FORMAT "]", atomic_read(&path->fail_total));
+					sz += snprintf(result + sz, maxlen - sz, "[" ARG_FORMAT "]",
+						       atomic_read(&path->fail_total));
 
 				sz += snprintf(result + sz, maxlen - sz, " ");
 			}
@@ -821,12 +797,12 @@
 		break;
 
 	case STATUSTYPE_TABLE:
-		list_for_each_entry(path, &mc->paths, list) {
+		list_for_each_entry (path, &m->paths, list) {
 			sz += snprintf(result + sz, maxlen - sz,
 				       "%s " ARG_FORMAT ARG_FORMAT " ",
 				       dm_kdevname(to_kdev_t(path->dev->dev)),
 				       path->reactivation_interval,
-				       path->fail_max);
+				       path->fail_limit);
 
 			if (ps_status) {
 				ps_status(path->ps_private, type,
@@ -842,96 +818,4 @@
 	}
 
 	return 0;
-}
-
-static struct target_type multipath_target = {
-	.name = "multipath",
-	.module = THIS_MODULE,
-	.ctr = multipath_ctr,
-	.dtr = multipath_dtr,
-	.map = multipath_map,
-	.end_io = multipath_end_io,
-	.suspend = multipath_suspend,
-	.resume = multipath_resume,
-	.status = multipath_status,
-};
-
-int __init dm_multipath_init(void)
-{
-	int r = -EINVAL;
-
-	if (!ios)
-		ios = DEFAULT_IOS;
-	else if (ios < MIN_IOS || ios > MAX_IOS)
-		goto bad;
-
-	r = -ENOMEM;
-
-	/* Create multipath io slab */
-	_multipath_cache = kmem_cache_create("dm multipath io",
-					     sizeof(struct multipath_io),
-					     0, 0, NULL, NULL);
-	if (!_multipath_cache)
-		goto bad;
-
-	/* Create multipath io mempool */
-	_multipath_pool = mempool_create(ios, mempool_alloc_slab,
-					 mempool_free_slab,
-					 _multipath_cache);
-	if (!_multipath_pool)
-		goto bad_pool;
-
-	r = dm_register_target(&multipath_target);
-	if (r < 0) {
-		DMERR("%s: register failed %d", multipath_target.name, r);
-		goto bad_target;
-	}
-
-	r = dm_register_path_selectors();
-	if (r && r != -EEXIST)
-		goto bad_ps;
-
-	r = dm_daemon_start(&_kmultipathd, "kmultipathd", do_work);
-	if (!r) {
-		DMINFO("dm_multipath v0.2.0 (%d io contexts preallocated)",
-		       ios);
-		return 0;
-	}
-
-bad_ps:
-	dm_unregister_target(&multipath_target);
-
-bad_target:
-	mempool_destroy(_multipath_pool);
-
-bad_pool:
-	kmem_cache_destroy(_multipath_cache);
-
-bad:
-	return r;
-}
-
-void __exit dm_multipath_exit(void)
-{
-	int r;
-
-	dm_daemon_stop(&_kmultipathd);
-	dm_unregister_path_selectors();
-	r = dm_unregister_target(&multipath_target);
-	if (r < 0)
-		DMERR("%s: target unregister failed %d",
-		      multipath_target.name, r);
-
-	mempool_destroy(_multipath_pool);
-	kmem_cache_destroy(_multipath_cache);
-}
-
-/* Module hooks */
-module_init(dm_multipath_init);
-module_exit(dm_multipath_exit);
-
-MODULE_DESCRIPTION(DM_NAME " multipath target");
-MODULE_AUTHOR("Heinz Mauelshagen <mge@sistina.com>");
-MODULE_LICENSE("GPL");
-MODULE_PARM(ios, "i");
-MODULE_PARM_DESC(ios, "number of preallocated io contexts");
+#endif
--- diff/drivers/md/dm-null-ps.c	2003-12-29 10:15:43.000000000 +0000
+++ source/drivers/md/dm-null-ps.c	2003-12-29 10:16:08.000000000 +0000
@@ -21,16 +21,14 @@
 struct null_c {
 	spinlock_t lock;
 
-	struct list_head paths;		/* List of operational paths */
-	struct list_head failed_paths;	/* List of failed paths */
+	struct list_head valid_paths;
+	struct list_head invalid_paths;
 };
 
-/* Path info */
-struct path_c {
-	struct list_head list;		/* Linked list to null_c */
-
-	struct path *path;		/* Opaque pointer to caller path info */
-	struct null_c *nc;		/* Back pointer path selector context */
+/* We keep the paths on linked lists */
+struct path_list {
+	struct list_head list;
+	struct path *path;
 };
 
 /* Allocate null context */
@@ -39,57 +37,34 @@
 	struct null_c *nc = kmalloc(sizeof(*nc), GFP_KERNEL);
 
 	if (nc) {
-		INIT_LIST_HEAD(&nc->paths);
-		INIT_LIST_HEAD(&nc->failed_paths);
+		INIT_LIST_HEAD(&nc->valid_paths);
+		INIT_LIST_HEAD(&nc->invalid_paths);
 		nc->lock = SPIN_LOCK_UNLOCKED;
 	}
 
 	return nc;
 }
 
-/* Allocate path context */
-static struct path_c *alloc_path_c(void)
-{
-	struct path_c *pc = kmalloc(sizeof(*pc), GFP_KERNEL);
-
-	if (pc)
-		memset(pc, 0, sizeof(*pc));
-
-	return pc;
-}
-
-
 /* Path selector constructor */
-static int null_ctr(struct path_selector *ps,
-		    int argc, char **argv, char **error)
+static int null_ctr(struct path_selector *ps)
 {
 	struct null_c *nc;
 
-	if (argc) {
-		*error = "null path selector: No arguments allowed";
-		return -EINVAL;
-	}
-
 	nc = alloc_null_c();
-	if (!nc) {
-		*error = "null path selector: Error allocating context";
+	if (!nc)
 		return -ENOMEM;
-	}
 
-	ps->context = (void *) nc;
+	ps->context = nc;
 	return 0;
 }
 
 static void free_paths(struct list_head *paths)
 {
-	struct list_head *elem, *tmp;
+	struct path_list *pl, *next;
 
-	list_for_each_safe(elem, tmp, paths) {
-		struct path_c *path =
-			list_entry(elem, struct path_c, list);
-
-		list_del(elem);
-		kfree(path);
+	list_for_each_entry_safe (pl, next, paths, list) {
+		list_del(&pl->list);
+		kfree(pl);
 	}
 }
 
@@ -97,68 +72,94 @@
 static void null_dtr(struct path_selector *ps)
 {
 	struct null_c *nc = (struct null_c *) ps->context;
-	free_paths(&nc->paths);
-	free_paths(&nc->failed_paths);
+	free_paths(&nc->valid_paths);
+	free_paths(&nc->invalid_paths);
 	kfree(nc);
 }
 
 /* Path add context */
-static void *null_add_path(struct path_selector *ps, struct path *path,
-			   int argc, char **argv, char **error)
+static int null_add_path(struct path_selector *ps, struct path *path,
+			 int argc, char **argv, char **error)
 {
 	struct null_c *nc = (struct null_c *) ps->context;
-	struct path_c *pc;
+	struct path_list *pl;
 
 	if (argc) {
-		*error = "null path selector: No path arguments allowd";
-		return NULL;
+		*error = "null path selector: No path arguments allowed";
+		return -EINVAL;
 	}
 
-	pc = alloc_path_c();
-	if (!pc) {
+	pl = kmalloc(sizeof(*pl), GFP_KERNEL);
+	if (!pl) {
 		*error = "null path selector: Error allocating path context";
-		return NULL;
+		return -ENOMEM;
 	}
 
-	pc->path = path;
-	pc->nc = nc;
+	pl->path = path;
 
 	spin_lock(&nc->lock);
-	list_add_tail(&pc->list, &nc->paths);
+	list_add_tail(&pl->list, &nc->valid_paths);
 	spin_unlock(&nc->lock);
 
-	return (void *) pc;
+	return 0;
 }
 
-/* Path set state (state = 0 : operational; state != 0 : failed */
-static void null_set_path_state(void *ps_private, unsigned long state)
+/*
+ * Search a list for a particular path.
+ */
+static struct path_list *__find_path(struct list_head *head, struct path *p)
+{
+	struct path_list *pl;
+
+	list_for_each_entry (pl, head, list)
+		if (pl->path == p)
+			return pl;
+
+	return NULL;
+}
+
+static void null_set_path_state(struct path_selector *ps,
+				struct path *p, int valid)
 {
 	unsigned long flags;
-	struct path_c *path = (struct path_c *) ps_private;
-	struct null_c *nc = path->nc;
+	struct null_c *nc = (struct null_c *) ps->context;
+	struct path_list *pl;
 
+	/*
+	 * This function will be called infrequently so we don't
+	 * mind the expense of these searches.
+	 */
 	spin_lock_irqsave(&nc->lock, flags);
-	list_move_tail(&path->list, state ? &nc->failed_paths : &nc->paths);
+	pl = __find_path(&nc->valid_paths, p);
+	if (!pl)
+		pl = __find_path(&nc->invalid_paths, p);
+
+	if (!pl)
+		DMWARN("asked to change the state of an unknown path");
+
+	else
+		list_move_tail(&pl->list, valid ?
+			       &nc->valid_paths : &nc->invalid_paths);
+
 	spin_unlock_irqrestore(&nc->lock, flags);
 }
 
 /* Path selector */
 static struct path *null_select_path(struct path_selector *ps,
 				     struct bio *bio,
-				     struct path_info *path_context)
+				     union map_info *map_context)
 {
 	unsigned long flags;
 	struct null_c *nc = (struct null_c *) ps->context;
-	struct list_head *list = &nc->paths;
-	struct path_c *path = NULL;
+	struct list_head *list = &nc->valid_paths;
+	struct path_list *pl = NULL;
 
 	spin_lock_irqsave(&nc->lock, flags);
 	if (!list_empty(list))
-		path = list_entry(list->next, struct path_c, list);
+		pl = list_entry(list->next, struct path_list, list);
 	spin_unlock_irqrestore(&nc->lock, flags);
 
-	/* Return opaque pointer to caller path object or NULL */
-	return path ? path->path : NULL;
+	return pl ? pl->path : NULL;
 }
 
 static struct path_selector_type null_ps = {
--- diff/drivers/md/dm-path-selector.h	2003-12-29 10:15:43.000000000 +0000
+++ source/drivers/md/dm-path-selector.h	2003-12-29 10:16:08.000000000 +0000
@@ -12,12 +12,14 @@
 #ifndef	DM_PATH_SELECTOR_H
 #define	DM_PATH_SELECTOR_H
 
+#include <linux/device-mapper.h>
+
 struct path;
-struct path_info {
-	void *ptr;
-	unsigned long long ll;
-};
 
+/*
+ * We provide an abstraction for the code that chooses which path
+ * to send some io down.
+ */
 struct path_selector_type;
 struct path_selector {
 	struct path_selector_type *type;
@@ -27,55 +29,53 @@
 /*
  * Constructs a path selector object, takes custom arguments
  */
-typedef int		(*ps_ctr_fn) (struct path_selector *ps,
-				      int argc, char **argv,
-				      char **error);
-typedef void		(*ps_dtr_fn) (struct path_selector *ps);
+typedef int (*ps_ctr_fn) (struct path_selector *ps);
+typedef void (*ps_dtr_fn) (struct path_selector *ps);
 
 /*
  * Add an opaque path object, along with some selector specific
  * path args (eg, path priority).
  */
-/*
- * FIXME: what is this returning ? */
-typedef	void *		(*ps_add_path_fn) (struct path_selector *ps,
-					   struct path *path,
-					   int argc, char **argv, char **error);
+typedef	int (*ps_add_path_fn) (struct path_selector *ps,
+			       struct path *path,
+			       int argc, char **argv, char **error);
 
 /*
  * Chooses a path for this io, if no paths are available then
- * NULL will be returned. Can take path_info over to ps_endio_fn below.
+ * NULL will be returned. The selector may set the map_info
+ * object if it wishes, this will be fed back into the endio fn.
  *
  * Must ensure that _any_ dynamically allocated selection context is
  * reused or reallocated because an endio call (which needs to free it)
  * might happen after a couple of select calls.
  */
-typedef	struct path *	(*ps_select_path_fn) (struct path_selector *ps,
-					      struct bio *bio,
-					      struct path_info *path_context);
+typedef	struct path *(*ps_select_path_fn) (struct path_selector *ps,
+					   struct bio *bio,
+					   union map_info *map_context);
 
 /*
  * Hook the end of the io, path throughput/failure can be
- * detected through this. Must ensure, that any dynamically allocted
+ * detected through this. Must ensure, that any dynamically allocated
  * IO context gets freed.
  */
-typedef	void		(*ps_endio_fn) (struct path_selector *ps,
-					struct bio *bio, int error,
-					struct path_info *path_context);
+typedef	void (*ps_endio_fn) (struct path_selector *ps,
+			     struct bio *bio, int error,
+			     union map_info *map_context);
 
 /*
- * Set path state (eg, failed/operational)
+ * Notify the selector that a path has failed.
  */
-typedef	void		(*ps_set_path_state_fn) (void *context,
-						 unsigned long state);
+typedef	void (*ps_set_path_state_fn) (struct path_selector *ps,
+				      struct path *p, int valid);
 
 /*
  * Table content based on parameters added in ps_add_path_fn
  * or path selector status
  */
-typedef	int		(*ps_status_fn) (void *context,
-					 status_type_t type,
-					 char *result, unsigned int maxlen);
+typedef	int (*ps_status_fn) (struct path_selector *ps,
+			     struct path *path,
+			     status_type_t type,
+			     char *result, unsigned int maxlen);
 
 /* Information about a path selector type */
 struct path_selector_type {
@@ -92,6 +92,10 @@
 	ps_status_fn status;
 };
 
+/*
+ * FIXME: Factor out registration code.
+ */
+
 /* Register a path selector */
 int dm_register_path_selector(struct path_selector_type *type);
 
--- diff/drivers/md/dm-raid1.c	2003-12-29 10:15:20.000000000 +0000
+++ source/drivers/md/dm-raid1.c	2003-12-29 10:16:09.000000000 +0000
@@ -893,7 +893,7 @@
 	blk_run_queues();
 }
 
-static void do_work(void)
+static jiffy_t do_work(void)
 {
 	struct mirror_set *ms;
 
@@ -901,6 +901,8 @@
 	list_for_each_entry (ms, &_mirror_sets, list)
 		do_mirror(ms);
 	up_read(&_mirror_sets_lock);
+
+	return 0;
 }
 
 /*-----------------------------------------------------------------