]> git.neil.brown.name Git - history.git/commitdiff
[PATCH] Device Mapper, with updates
authorAlan Cox <alan@lxorguk.ukuu.org.uk>
Mon, 28 Oct 2002 06:48:06 +0000 (22:48 -0800)
committerLinus Torvalds <torvalds@home.transmeta.com>
Mon, 28 Oct 2002 06:48:06 +0000 (22:48 -0800)
This is the device mapper with Joe's updates applied and in -ac for a bit

12 files changed:
drivers/md/Config.help
drivers/md/Config.in
drivers/md/Makefile
drivers/md/dm-ioctl.c [new file with mode: 0644]
drivers/md/dm-linear.c [new file with mode: 0644]
drivers/md/dm-stripe.c [new file with mode: 0644]
drivers/md/dm-table.c [new file with mode: 0644]
drivers/md/dm-target.c [new file with mode: 0644]
drivers/md/dm.c [new file with mode: 0644]
drivers/md/dm.h [new file with mode: 0644]
include/linux/device-mapper.h [new file with mode: 0644]
include/linux/dm-ioctl.h [new file with mode: 0644]

index f892b7fae2f2a1c75eb1119cad94ad05eed61824..543ac06a2b51adffd944aa2112915e10ea8e67fc 100644 (file)
@@ -122,3 +122,15 @@ CONFIG_MD_MULTIPATH
 
   If unsure, say N.
 
+CONFIG_BLK_DEV_DM
+  Device-mapper is a low level volume manager.  It works by allowing
+  people to specify mappings for ranges of logical sectors.  Various
+  mapping types are available, in addition people may write their own
+  modules containing custom mappings if they wish.
+
+  Higher level volume managers such as LVM2 use this driver.
+
+  If you want to compile this as a module, say M here and read 
+  <file:Documentation/modules.txt>.  The module will be called dm-mod.o.
+
+  If unsure, say N.
index 8b442ff26eae1aba8c0cc5df9a5ef726415372f4..a4440af56d3d9522b16ccedfca86afb8108ad002 100644 (file)
@@ -14,5 +14,6 @@ dep_tristate '  RAID-4/RAID-5 mode' CONFIG_MD_RAID5 $CONFIG_BLK_DEV_MD
 dep_tristate '  Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD
 
 dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD
+dep_tristate ' Device mapper support' CONFIG_BLK_DEV_DM $CONFIG_MD
 
 endmenu
index 3016b63b2c2e2ea9634b6638ea6b646a22425ae3..90821678ac5555494c5a10a2c55340619357a70e 100644 (file)
@@ -2,8 +2,10 @@
 # Makefile for the kernel software RAID and LVM drivers.
 #
 
-export-objs    := md.o xor.o
+export-objs    := md.o xor.o dm-table.o dm-target.o
 lvm-mod-objs   := lvm.o lvm-snap.o lvm-fs.o
+dm-mod-objs    := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
+                  dm-ioctl.o
 
 # Note: link order is important.  All raid personalities
 # and xor.o must come before md.o, as they each initialise 
@@ -17,6 +19,7 @@ obj-$(CONFIG_MD_RAID5)                += raid5.o xor.o
 obj-$(CONFIG_MD_MULTIPATH)     += multipath.o
 obj-$(CONFIG_BLK_DEV_MD)       += md.o
 obj-$(CONFIG_BLK_DEV_LVM)      += lvm-mod.o
+obj-$(CONFIG_BLK_DEV_DM)       += dm-mod.o
 
 include $(TOPDIR)/Rules.make
 
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
new file mode 100644 (file)
index 0000000..8684626
--- /dev/null
@@ -0,0 +1,1129 @@
+/*
+ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/compatmac.h>
+#include <linux/miscdevice.h>
+#include <linux/dm-ioctl.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+#include <linux/blk.h>
+#include <linux/slab.h>
+
+#define DM_DRIVER_EMAIL "dm@uk.sistina.com"
+
+/*-----------------------------------------------------------------
+ * The ioctl interface needs to be able to look up devices by
+ * name or uuid.
+ *---------------------------------------------------------------*/
+struct hash_cell {
+       struct list_head name_list;
+       struct list_head uuid_list;
+
+       char *name;
+       char *uuid;
+       struct mapped_device *md;
+
+       /* I hate devfs */
+       devfs_handle_t devfs_entry;
+};
+
+#define NUM_BUCKETS 64
+#define MASK_BUCKETS (NUM_BUCKETS - 1)
+static struct list_head _name_buckets[NUM_BUCKETS];
+static struct list_head _uuid_buckets[NUM_BUCKETS];
+
+static devfs_handle_t _dev_dir;
+void dm_hash_remove_all(void);
+
+/*
+ * Guards access to all three tables.
+ */
+static DECLARE_RWSEM(_hash_lock);
+
+static void init_buckets(struct list_head *buckets)
+{
+       unsigned int i;
+
+       for (i = 0; i < NUM_BUCKETS; i++)
+               INIT_LIST_HEAD(buckets + i);
+}
+
+int dm_hash_init(void)
+{
+       init_buckets(_name_buckets);
+       init_buckets(_uuid_buckets);
+       _dev_dir = devfs_mk_dir(0, DM_DIR, NULL);
+       return 0;
+}
+
+void dm_hash_exit(void)
+{
+       dm_hash_remove_all();
+       devfs_unregister(_dev_dir);
+}
+
+/*-----------------------------------------------------------------
+ * Hash function:
+ * We're not really concerned with the str hash function being
+ * fast since it's only used by the ioctl interface.
+ *---------------------------------------------------------------*/
+static unsigned int hash_str(const char *str)
+{
+       const unsigned int hash_mult = 2654435387U;
+       unsigned int h = 0;
+
+       while (*str)
+               h = (h + (unsigned int) *str++) * hash_mult;
+
+       return h & MASK_BUCKETS;
+}
+
+/*-----------------------------------------------------------------
+ * Code for looking up a device by name
+ *---------------------------------------------------------------*/
+static struct hash_cell *__get_name_cell(const char *str)
+{
+       struct list_head *tmp;
+       struct hash_cell *hc;
+       unsigned int h = hash_str(str);
+
+       list_for_each (tmp, _name_buckets + h) {
+               hc = list_entry(tmp, struct hash_cell, name_list);
+               if (!strcmp(hc->name, str))
+                       return hc;
+       }
+
+       return NULL;
+}
+
+static struct hash_cell *__get_uuid_cell(const char *str)
+{
+       struct list_head *tmp;
+       struct hash_cell *hc;
+       unsigned int h = hash_str(str);
+
+       list_for_each (tmp, _uuid_buckets + h) {
+               hc = list_entry(tmp, struct hash_cell, uuid_list);
+               if (!strcmp(hc->uuid, str))
+                       return hc;
+       }
+
+       return NULL;
+}
+
+/*-----------------------------------------------------------------
+ * Inserting, removing and renaming a device.
+ *---------------------------------------------------------------*/
+static inline char *kstrdup(const char *str)
+{
+       char *r = kmalloc(strlen(str) + 1, GFP_KERNEL);
+       if (r)
+               strcpy(r, str);
+       return r;
+}
+
+static struct hash_cell *alloc_cell(const char *name, const char *uuid,
+                                   struct mapped_device *md)
+{
+       struct hash_cell *hc;
+
+       hc = kmalloc(sizeof(*hc), GFP_KERNEL);
+       if (!hc)
+               return NULL;
+
+       hc->name = kstrdup(name);
+       if (!hc->name) {
+               kfree(hc);
+               return NULL;
+       }
+
+       if (!uuid)
+               hc->uuid = NULL;
+
+       else {
+               hc->uuid = kstrdup(uuid);
+               if (!hc->uuid) {
+                       kfree(hc->name);
+                       kfree(hc);
+                       return NULL;
+               }
+       }
+
+       INIT_LIST_HEAD(&hc->name_list);
+       INIT_LIST_HEAD(&hc->uuid_list);
+       hc->md = md;
+       return hc;
+}
+
+static void free_cell(struct hash_cell *hc)
+{
+       if (hc) {
+               kfree(hc->name);
+               kfree(hc->uuid);
+               kfree(hc);
+       }
+}
+
+/*
+ * devfs stuff.
+ */
+static int register_with_devfs(struct hash_cell *hc)
+{
+       kdev_t dev = dm_kdev(hc->md);
+       hc->devfs_entry =
+           devfs_register(_dev_dir, hc->name, DEVFS_FL_CURRENT_OWNER,
+                          major(dev), minor(dev),
+                          S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
+                          &dm_blk_dops, NULL);
+
+       return 0;
+}
+
+static int unregister_with_devfs(struct hash_cell *hc)
+{
+       devfs_unregister(hc->devfs_entry);
+       return 0;
+}
+
+/*
+ * The kdev_t and uuid of a device can never change once it is
+ * initially inserted.
+ */
+int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md)
+{
+       struct hash_cell *cell;
+
+       /*
+        * Allocate the new cells.
+        */
+       cell = alloc_cell(name, uuid, md);
+       if (!cell)
+               return -ENOMEM;
+
+       /*
+        * Insert the cell into all three hash tables.
+        */
+       down_write(&_hash_lock);
+       if (__get_name_cell(name))
+               goto bad;
+
+       list_add(&cell->name_list, _name_buckets + hash_str(name));
+
+       if (uuid) {
+               if (__get_uuid_cell(uuid)) {
+                       list_del(&cell->name_list);
+                       goto bad;
+               }
+               list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
+       }
+       register_with_devfs(cell);
+       dm_get(md);
+       up_write(&_hash_lock);
+
+       return 0;
+
+ bad:
+       up_write(&_hash_lock);
+       free_cell(cell);
+       return -EBUSY;
+}
+
+void __hash_remove(struct hash_cell *hc)
+{
+       /* remove from the dev hash */
+       list_del(&hc->uuid_list);
+       list_del(&hc->name_list);
+       unregister_with_devfs(hc);
+       dm_put(hc->md);
+}
+
+void dm_hash_remove_all(void)
+{
+       int i;
+       struct hash_cell *hc;
+       struct list_head *tmp, *n;
+
+       down_write(&_hash_lock);
+       for (i = 0; i < NUM_BUCKETS; i++) {
+               list_for_each_safe (tmp, n, _name_buckets + i) {
+                       hc = list_entry(tmp, struct hash_cell, name_list);
+                       __hash_remove(hc);
+               }
+       }
+       up_write(&_hash_lock);
+}
+
+int dm_hash_rename(const char *old, const char *new)
+{
+       char *new_name, *old_name;
+       struct hash_cell *hc;
+
+       /*
+        * duplicate new.
+        */
+       new_name = kstrdup(new);
+       if (!new_name)
+               return -ENOMEM;
+
+       down_write(&_hash_lock);
+
+       /*
+        * Is new free ?
+        */
+       hc = __get_name_cell(new);
+       if (hc) {
+               DMWARN("asked to rename to an already existing name %s -> %s",
+                      old, new);
+               up_write(&_hash_lock);
+               return -EBUSY;
+       }
+
+       /*
+        * Is there such a device as 'old' ?
+        */
+       hc = __get_name_cell(old);
+       if (!hc) {
+               DMWARN("asked to rename a non existent device %s -> %s",
+                      old, new);
+               up_write(&_hash_lock);
+               return -ENXIO;
+       }
+
+       /*
+        * rename and move the name cell.
+        */
+       list_del(&hc->name_list);
+       old_name = hc->name;
+       hc->name = new_name;
+       list_add(&hc->name_list, _name_buckets + hash_str(new_name));
+
+       /* rename the device node in devfs */
+       unregister_with_devfs(hc);
+       register_with_devfs(hc);
+
+       up_write(&_hash_lock);
+       kfree(old_name);
+       return 0;
+}
+
+
+/*-----------------------------------------------------------------
+ * Implementation of the ioctl commands
+ *---------------------------------------------------------------*/
+
+/*
+ * All the ioctl commands get dispatched to functions with this
+ * prototype.
+ */
+typedef int (*ioctl_fn)(struct dm_ioctl *param, struct dm_ioctl *user);
+
+/*
+ * Check a string doesn't overrun the chunk of
+ * memory we copied from userland.
+ */
+static int valid_str(char *str, void *begin, void *end)
+{
+       while (((void *) str >= begin) && ((void *) str < end))
+               if (!*str++)
+                       return 0;
+
+       return -EINVAL;
+}
+
+static int next_target(struct dm_target_spec *last, uint32_t next,
+                      void *begin, void *end,
+                      struct dm_target_spec **spec, char **params)
+{
+       *spec = (struct dm_target_spec *)
+           ((unsigned char *) last + next);
+       *params = (char *) (*spec + 1);
+
+       if (*spec < (last + 1) || ((void *) *spec > end))
+               return -EINVAL;
+
+       return valid_str(*params, begin, end);
+}
+
+static int populate_table(struct dm_table *table, struct dm_ioctl *args)
+{
+       int i = 0, r, first = 1;
+       struct dm_target_spec *spec;
+       char *params;
+       void *begin, *end;
+
+       if (!args->target_count) {
+               DMWARN("populate_table: no targets specified");
+               return -EINVAL;
+       }
+
+       begin = (void *) args;
+       end = begin + args->data_size;
+
+       for (i = 0; i < args->target_count; i++) {
+
+               if (first)
+                       r = next_target((struct dm_target_spec *) args,
+                                       args->data_start,
+                                       begin, end, &spec, &params);
+               else
+                       r = next_target(spec, spec->next, begin, end,
+                                       &spec, &params);
+
+               if (r) {
+                       DMWARN("unable to find target");
+                       return -EINVAL;
+               }
+
+               r = dm_table_add_target(table, spec->target_type,
+                                       spec->sector_start, spec->length,
+                                       params);
+               if (r) {
+                       DMWARN("internal error adding target to table");
+                       return -EINVAL;
+               }
+
+               first = 0;
+       }
+
+       return dm_table_complete(table);
+}
+
+/*
+ * Round up the ptr to the next 'align' boundary.  Obviously
+ * 'align' must be a power of 2.
+ */
+static inline void *align_ptr(void *ptr, unsigned int align)
+{
+       align--;
+       return (void *) (((unsigned long) (ptr + align)) & ~align);
+}
+
+/*
+ * Copies a dm_ioctl and an optional additional payload to
+ * userland.
+ */
+static int results_to_user(struct dm_ioctl *user, struct dm_ioctl *param,
+                          void *data, uint32_t len)
+{
+       int r;
+       void *ptr = NULL;
+
+       if (data) {
+               ptr = align_ptr(user + 1, sizeof(unsigned long));
+               param->data_start = ptr - (void *) user;
+       }
+
+       /*
+        * The version number has already been filled in, so we
+        * just copy later fields.
+        */
+       r = copy_to_user(&user->data_size, &param->data_size,
+                        sizeof(*param) - sizeof(param->version));
+       if (r)
+               return -EFAULT;
+
+       if (data) {
+               if (param->data_start + len > param->data_size)
+                       return -ENOSPC;
+
+               if (copy_to_user(ptr, data, len))
+                       r = -EFAULT;
+       }
+
+       return r;
+}
+
+/*
+ * Fills in a dm_ioctl structure, ready for sending back to
+ * userland.
+ */
+static int __info(struct mapped_device *md, struct dm_ioctl *param)
+{
+       struct dm_table *table;
+       struct block_device *bdev;
+
+       param->flags = DM_EXISTS_FLAG;
+       if (dm_suspended(md))
+               param->flags |= DM_SUSPEND_FLAG;
+
+       param->dev = kdev_t_to_nr(dm_kdev(md));
+       bdev = bdget(param->dev);
+       if (!bdev)
+               return -ENXIO;
+
+       if (bdev_read_only(bdev))
+               param->flags |= DM_READONLY_FLAG;
+
+       param->open_count = bdev->bd_openers;
+       bdput(bdev);
+
+       table = dm_get_table(md);
+       param->target_count = dm_table_get_num_targets(table);
+       dm_table_put(table);
+
+       return 0;
+}
+
+/*
+ * Always use UUID for lookups if it's present, otherwise use name.
+ */
+static inline struct mapped_device *find_device(struct dm_ioctl *param)
+{
+       struct hash_cell *hc;
+       struct mapped_device *md = NULL;
+
+       down_read(&_hash_lock);
+       hc = *param->uuid ? __get_uuid_cell(param->uuid) :
+               __get_name_cell(param->name);
+       if (hc) {
+               md = hc->md;
+
+               /*
+                * Sneakily write in both the name and the uuid
+                * while we have the cell.
+                */
+               strncpy(param->name, hc->name, sizeof(param->name));
+               if (hc->uuid)
+                       strncpy(param->uuid, hc->uuid, sizeof(param->uuid) - 1);
+               else
+                       param->uuid[0] = '\0';
+
+               dm_get(md);
+       }
+       up_read(&_hash_lock);
+
+       return md;
+}
+
+#define ALIGNMENT sizeof(int)
+static void *_align(void *ptr, unsigned int a)
+{
+       register unsigned long align = --a;
+
+       return (void *) (((unsigned long) ptr + align) & ~align);
+}
+
+/*
+ * Copies device info back to user space, used by
+ * the create and info ioctls.
+ */
+static int info(struct dm_ioctl *param, struct dm_ioctl *user)
+{
+       struct mapped_device *md;
+
+       param->flags = 0;
+
+       md = find_device(param);
+       if (!md)
+               /*
+                * Device not found - returns cleared exists flag.
+                */
+               goto out;
+
+       __info(md, param);
+       dm_put(md);
+
+      out:
+       return results_to_user(user, param, NULL, 0);
+}
+
+static inline int get_mode(struct dm_ioctl *param)
+{
+       int mode = FMODE_READ | FMODE_WRITE;
+
+       if (param->flags & DM_READONLY_FLAG)
+               mode = FMODE_READ;
+
+       return mode;
+}
+
+static int check_name(const char *name)
+{
+       if (strchr(name, '/')) {
+               DMWARN("invalid device name");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int create(struct dm_ioctl *param, struct dm_ioctl *user)
+{
+       int r;
+       struct dm_table *t;
+       struct mapped_device *md;
+       int minor;
+
+       r = check_name(param->name);
+       if (r)
+               return r;
+
+       r = dm_table_create(&t, get_mode(param));
+       if (r)
+               return r;
+
+       r = populate_table(t, param);
+       if (r) {
+               dm_table_put(t);
+               return r;
+       }
+
+       minor = (param->flags & DM_PERSISTENT_DEV_FLAG) ?
+               minor(to_kdev_t(param->dev)) : -1;
+
+       r = dm_create(minor, t, &md);
+       if (r) {
+               dm_table_put(t);
+               return r;
+       }
+       dm_table_put(t);        /* md will have grabbed its own reference */
+
+       set_device_ro(dm_kdev(md), (param->flags & DM_READONLY_FLAG));
+       r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
+       dm_put(md);
+
+       return r ? r : info(param, user);
+}
+
+/*
+ * Build up the status struct for each target
+ */
+static int __status(struct mapped_device *md, struct dm_ioctl *param,
+                   char *outbuf, int *len)
+{
+       int i, num_targets;
+       struct dm_target_spec *spec;
+       char *outptr;
+       status_type_t type;
+       struct dm_table *table = dm_get_table(md);
+
+       if (param->flags & DM_STATUS_TABLE_FLAG)
+               type = STATUSTYPE_TABLE;
+       else
+               type = STATUSTYPE_INFO;
+
+       outptr = outbuf;
+
+       /* Get all the target info */
+       num_targets = dm_table_get_num_targets(table);
+       for (i = 0; i < num_targets; i++) {
+               struct dm_target *ti = dm_table_get_target(table, i);
+
+               if (outptr - outbuf +
+                   sizeof(struct dm_target_spec) > param->data_size) {
+                       dm_table_put(table);
+                       return -ENOMEM;
+               }
+
+               spec = (struct dm_target_spec *) outptr;
+
+               spec->status = 0;
+               spec->sector_start = ti->begin;
+               spec->length = ti->len;
+               strncpy(spec->target_type, ti->type->name,
+                       sizeof(spec->target_type));
+
+               outptr += sizeof(struct dm_target_spec);
+
+               /* Get the status/table string from the target driver */
+               if (ti->type->status)
+                       ti->type->status(ti, type, outptr,
+                                        outbuf + param->data_size - outptr);
+               else
+                       outptr[0] = '\0';
+
+               outptr += strlen(outptr) + 1;
+               _align(outptr, ALIGNMENT);
+               spec->next = outptr - outbuf;
+       }
+
+       param->target_count = num_targets;
+       *len = outptr - outbuf;
+       dm_table_put(table);
+
+       return 0;
+}
+
+/*
+ * Return the status of a device as a text string for each
+ * target.
+ */
+static int get_status(struct dm_ioctl *param, struct dm_ioctl *user)
+{
+       struct mapped_device *md;
+       int len = 0;
+       int ret;
+       char *outbuf = NULL;
+
+       md = find_device(param);
+       if (!md)
+               /*
+                * Device not found - returns cleared exists flag.
+                */
+               goto out;
+
+       /* We haven't a clue how long the resultant data will be so
+          just allocate as much as userland has allowed us and make sure
+          we don't overun it */
+       outbuf = kmalloc(param->data_size, GFP_KERNEL);
+       if (!outbuf)
+               goto out;
+       /*
+        * Get the status of all targets
+        */
+       __status(md, param, outbuf, &len);
+
+       /*
+        * Setup the basic dm_ioctl structure.
+        */
+       __info(md, param);
+
+      out:
+       if (md)
+               dm_put(md);
+
+       ret = results_to_user(user, param, outbuf, len);
+
+       if (outbuf)
+               kfree(outbuf);
+
+       return ret;
+}
+
+/*
+ * Wait for a device to report an event
+ */
+static int wait_device_event(struct dm_ioctl *param, struct dm_ioctl *user)
+{
+       struct mapped_device *md;
+       struct dm_table *table;
+       DECLARE_WAITQUEUE(wq, current);
+
+       md = find_device(param);
+       if (!md)
+               /*
+                * Device not found - returns cleared exists flag.
+                */
+               goto out;
+
+       /*
+        * Setup the basic dm_ioctl structure.
+        */
+       __info(md, param);
+
+       /*
+        * Wait for a notification event
+        */
+       set_current_state(TASK_INTERRUPTIBLE);
+       table = dm_get_table(md);
+       dm_table_add_wait_queue(table, &wq);
+       dm_table_put(table);
+       dm_put(md);
+
+       yield();
+       set_current_state(TASK_RUNNING);
+
+      out:
+       return results_to_user(user, param, NULL, 0);
+}
+
+/*
+ * Retrieves a list of devices used by a particular dm device.
+ */
+static int dep(struct dm_ioctl *param, struct dm_ioctl *user)
+{
+       int count, r;
+       struct mapped_device *md;
+       struct list_head *tmp;
+       size_t len = 0;
+       struct dm_target_deps *deps = NULL;
+       struct dm_table *table;
+
+       md = find_device(param);
+       if (!md)
+               goto out;
+       table = dm_get_table(md);
+
+       /*
+        * Setup the basic dm_ioctl structure.
+        */
+       __info(md, param);
+
+       /*
+        * Count the devices.
+        */
+       count = 0;
+       list_for_each(tmp, dm_table_get_devices(table))
+           count++;
+
+       /*
+        * Allocate a kernel space version of the dm_target_status
+        * struct.
+        */
+       if (array_too_big(sizeof(*deps), sizeof(*deps->dev), count)) {
+               dm_table_put(table);
+               dm_put(md);
+               return -ENOMEM;
+       }
+
+       len = sizeof(*deps) + (sizeof(*deps->dev) * count);
+       deps = kmalloc(len, GFP_KERNEL);
+       if (!deps) {
+               dm_table_put(table);
+               dm_put(md);
+               return -ENOMEM;
+       }
+
+       /*
+        * Fill in the devices.
+        */
+       deps->count = count;
+       count = 0;
+       list_for_each(tmp, dm_table_get_devices(table)) {
+               struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+               deps->dev[count++] = dd->bdev->bd_dev;
+       }
+       dm_table_put(table);
+       dm_put(md);
+
+      out:
+       r = results_to_user(user, param, deps, len);
+
+       kfree(deps);
+       return r;
+}
+
+static int remove(struct dm_ioctl *param, struct dm_ioctl *user)
+{
+       struct hash_cell *hc;
+
+       down_write(&_hash_lock);
+       hc = *param->uuid ? __get_uuid_cell(param->uuid) :
+               __get_name_cell(param->name);
+       if (!hc) {
+               DMWARN("device doesn't appear to be in the dev hash table.");
+               up_write(&_hash_lock);
+               return -EINVAL;
+       }
+
+       __hash_remove(hc);
+       up_write(&_hash_lock);
+       return 0;
+}
+
+static int remove_all(struct dm_ioctl *param, struct dm_ioctl *user)
+{
+       dm_hash_remove_all();
+       return 0;
+}
+
+static int suspend(struct dm_ioctl *param, struct dm_ioctl *user)
+{
+       int r;
+       struct mapped_device *md;
+
+       md = find_device(param);
+       if (!md)
+               return -ENXIO;
+
+       if (param->flags & DM_SUSPEND_FLAG)
+               r = dm_suspend(md);
+       else
+               r = dm_resume(md);
+
+       dm_put(md);
+       return r;
+}
+
+static int reload(struct dm_ioctl *param, struct dm_ioctl *user)
+{
+       int r;
+       struct mapped_device *md;
+       struct dm_table *t;
+
+       r = dm_table_create(&t, get_mode(param));
+       if (r)
+               return r;
+
+       r = populate_table(t, param);
+       if (r) {
+               dm_table_put(t);
+               return r;
+       }
+
+       md = find_device(param);
+       if (!md) {
+               dm_table_put(t);
+               return -ENXIO;
+       }
+
+       r = dm_swap_table(md, t);
+       if (r) {
+               dm_put(md);
+               dm_table_put(t);
+               return r;
+       }
+
+       set_device_ro(dm_kdev(md), (param->flags & DM_READONLY_FLAG));
+       dm_put(md);
+
+       r = info(param, user);
+       return r;
+}
+
+static int rename(struct dm_ioctl *param, struct dm_ioctl *user)
+{
+       int r;
+       char *new_name = (char *) param + param->data_start;
+
+       if (valid_str(new_name, (void *) param,
+                     (void *) param + param->data_size)) {
+               DMWARN("Invalid new logical volume name supplied.");
+               return -EINVAL;
+       }
+
+       r = check_name(new_name);
+       if (r)
+               return r;
+
+       return dm_hash_rename(param->name, new_name);
+}
+
+
+/*-----------------------------------------------------------------
+ * Implementation of open/close/ioctl on the special char
+ * device.
+ *---------------------------------------------------------------*/
+static ioctl_fn lookup_ioctl(unsigned int cmd)
+{
+       static struct {
+               int cmd;
+               ioctl_fn fn;
+       } _ioctls[] = {
+               {DM_VERSION_CMD, NULL}, /* version is dealt with elsewhere */
+               {DM_REMOVE_ALL_CMD, remove_all},
+               {DM_DEV_CREATE_CMD, create},
+               {DM_DEV_REMOVE_CMD, remove},
+               {DM_DEV_RELOAD_CMD, reload},
+               {DM_DEV_RENAME_CMD, rename},
+               {DM_DEV_SUSPEND_CMD, suspend},
+               {DM_DEV_DEPS_CMD, dep},
+               {DM_DEV_STATUS_CMD, info},
+               {DM_TARGET_STATUS_CMD, get_status},
+               {DM_TARGET_WAIT_CMD, wait_device_event},
+       };
+
+       return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
+}
+
+/*
+ * As well as checking the version compatibility this always
+ * copies the kernel interface version out.
+ */
+static int check_version(int cmd, struct dm_ioctl *user)
+{
+       uint32_t version[3];
+       int r = 0;
+
+       if (copy_from_user(version, user->version, sizeof(version)))
+               return -EFAULT;
+
+       if ((DM_VERSION_MAJOR != version[0]) ||
+           (DM_VERSION_MINOR < version[1])) {
+               DMWARN("ioctl interface mismatch: "
+                      "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)",
+                      DM_VERSION_MAJOR, DM_VERSION_MINOR,
+                      DM_VERSION_PATCHLEVEL,
+                      version[0], version[1], version[2], cmd);
+               r = -EINVAL;
+       }
+
+       /*
+        * Fill in the kernel version.
+        */
+       version[0] = DM_VERSION_MAJOR;
+       version[1] = DM_VERSION_MINOR;
+       version[2] = DM_VERSION_PATCHLEVEL;
+       if (copy_to_user(user->version, version, sizeof(version)))
+               return -EFAULT;
+
+       return r;
+}
+
+static void free_params(struct dm_ioctl *param)
+{
+       vfree(param);
+}
+
+static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param)
+{
+       struct dm_ioctl tmp, *dmi;
+
+       if (copy_from_user(&tmp, user, sizeof(tmp)))
+               return -EFAULT;
+
+       if (tmp.data_size < sizeof(tmp))
+               return -EINVAL;
+
+       dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
+       if (!dmi)
+               return -ENOMEM;
+
+       if (copy_from_user(dmi, user, tmp.data_size)) {
+               vfree(dmi);
+               return -EFAULT;
+       }
+
+       *param = dmi;
+       return 0;
+}
+
+static int validate_params(uint cmd, struct dm_ioctl *param)
+{
+       /* Unless creating, either name of uuid but not both */
+       if (cmd != DM_DEV_CREATE_CMD) {
+               if ((!*param->uuid && !*param->name) ||
+                   (*param->uuid && *param->name)) {
+                       DMWARN("one of name or uuid must be supplied");
+                       return -EINVAL;
+               }
+       }
+
+       /* Ensure strings are terminated */
+       param->name[DM_NAME_LEN - 1] = '\0';
+       param->uuid[DM_UUID_LEN - 1] = '\0';
+
+       return 0;
+}
+
+static int ctl_ioctl(struct inode *inode, struct file *file,
+                    uint command, ulong u)
+{
+       int r = 0, cmd;
+       struct dm_ioctl *param;
+       struct dm_ioctl *user = (struct dm_ioctl *) u;
+       ioctl_fn fn = NULL;
+
+       /* only root can play with this */
+       if (!capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
+       if (_IOC_TYPE(command) != DM_IOCTL)
+               return -ENOTTY;
+
+       cmd = _IOC_NR(command);
+
+       /*
+        * Check the interface version passed in.  This also
+        * writes out the kernels interface version.
+        */
+       r = check_version(cmd, user);
+       if (r)
+               return r;
+
+       /*
+        * Nothing more to do for the version command.
+        */
+       if (cmd == DM_VERSION_CMD)
+               return 0;
+
+       fn = lookup_ioctl(cmd);
+       if (!fn) {
+               DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
+               return -ENOTTY;
+       }
+
+       /*
+        * Copy the parameters into kernel space.
+        */
+       r = copy_params(user, &param);
+       if (r)
+               return r;
+
+       r = validate_params(cmd, param);
+       if (r) {
+               free_params(param);
+               return r;
+       }
+
+       r = fn(param, user);
+       free_params(param);
+       return r;
+}
+
+static struct file_operations _ctl_fops = {
+       .ioctl   = ctl_ioctl,
+       .owner   = THIS_MODULE,
+};
+
+static devfs_handle_t _ctl_handle;
+
+static struct miscdevice _dm_misc = {
+       .minor = MISC_DYNAMIC_MINOR,
+       .name  = DM_NAME,
+       .fops  = &_ctl_fops
+};
+
+/*
+ * Create misc character device and link to DM_DIR/control.
+ */
+int __init dm_interface_init(void)
+{
+       int r;
+       char rname[64];
+
+       r = dm_hash_init();
+       if (r)
+               return r;
+
+       r = misc_register(&_dm_misc);
+       if (r) {
+               DMERR("misc_register failed for control device");
+               dm_hash_exit();
+               return r;
+       }
+
+       r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3,
+                               sizeof rname - 3);
+       if (r == -ENOSYS)
+               return 0;       /* devfs not present */
+
+       if (r < 0) {
+               DMERR("devfs_generate_path failed for control device");
+               goto failed;
+       }
+
+       strncpy(rname + r, "../", 3);
+       r = devfs_mk_symlink(NULL, DM_DIR "/control",
+                            DEVFS_FL_DEFAULT, rname + r, &_ctl_handle, NULL);
+       if (r) {
+               DMERR("devfs_mk_symlink failed for control device");
+               goto failed;
+       }
+       devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle);
+
+       DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR,
+              DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA,
+              DM_DRIVER_EMAIL);
+       return 0;
+
+      failed:
+       dm_hash_exit();
+       misc_deregister(&_dm_misc);
+       return r;
+}
+
+void dm_interface_exit(void)
+{
+       dm_hash_exit();
+
+       if (misc_deregister(&_dm_misc) < 0)
+               DMERR("misc_deregister failed for control device");
+}
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
new file mode 100644 (file)
index 0000000..6f5f263
--- /dev/null
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <linux/slab.h>
+
+/*
+ * Linear: maps a linear range of a device.
+ */
+struct linear_c {
+       struct dm_dev *dev;
+       sector_t start;
+};
+
+/*
+ * Construct a linear mapping: <dev_path> <offset>
+ */
+static int linear_ctr(struct dm_target *ti, int argc, char **argv)
+{
+       struct linear_c *lc;
+
+       if (argc != 2) {
+               ti->error = "dm-linear: Not enough arguments";
+               return -EINVAL;
+       }
+
+       lc = kmalloc(sizeof(*lc), GFP_KERNEL);
+       if (lc == NULL) {
+               ti->error = "dm-linear: Cannot allocate linear context";
+               return -ENOMEM;
+       }
+
+       if (sscanf(argv[1], SECTOR_FORMAT, &lc->start) != 1) {
+               ti->error = "dm-linear: Invalid device sector";
+               goto bad;
+       }
+
+       if (dm_get_device(ti, argv[0], ti->begin, ti->len,
+                         dm_table_get_mode(ti->table), &lc->dev)) {
+               ti->error = "dm-linear: Device lookup failed";
+               goto bad;
+       }
+
+       ti->private = lc;
+       return 0;
+
+      bad:
+       kfree(lc);
+       return -EINVAL;
+}
+
+static void linear_dtr(struct dm_target *ti)
+{
+       struct linear_c *lc = (struct linear_c *) ti->private;
+
+       dm_put_device(ti, lc->dev);
+       kfree(lc);
+}
+
+static int linear_map(struct dm_target *ti, struct bio *bio)
+{
+       struct linear_c *lc = (struct linear_c *) ti->private;
+
+       bio->bi_bdev = lc->dev->bdev;
+       bio->bi_sector = lc->start + (bio->bi_sector - ti->begin);
+
+       return 1;
+}
+
+static int linear_status(struct dm_target *ti, status_type_t type,
+                        char *result, int maxlen)
+{
+       struct linear_c *lc = (struct linear_c *) ti->private;
+
+       switch (type) {
+       case STATUSTYPE_INFO:
+               result[0] = '\0';
+               break;
+
+       case STATUSTYPE_TABLE:
+               snprintf(result, maxlen, "%s " SECTOR_FORMAT,
+                        kdevname(to_kdev_t(lc->dev->bdev->bd_dev)), lc->start);
+               break;
+       }
+       return 0;
+}
+
+static struct target_type linear_target = {
+       .name   = "linear",
+       .module = THIS_MODULE,
+       .ctr    = linear_ctr,
+       .dtr    = linear_dtr,
+       .map    = linear_map,
+       .status = linear_status,
+};
+
+int __init dm_linear_init(void)
+{
+       int r = dm_register_target(&linear_target);
+
+       if (r < 0)
+               DMERR("linear: register failed %d", r);
+
+       return r;
+}
+
+void dm_linear_exit(void)
+{
+       int r = dm_unregister_target(&linear_target);
+
+       if (r < 0)
+               DMERR("linear: unregister failed %d", r);
+}
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
new file mode 100644 (file)
index 0000000..abd2733
--- /dev/null
@@ -0,0 +1,258 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <linux/slab.h>
+
+struct stripe {
+       struct dm_dev *dev;
+       sector_t physical_start;
+};
+
+struct stripe_c {
+       uint32_t stripes;
+
+       /* The size of this target / num. stripes */
+       uint32_t stripe_width;
+
+       /* stripe chunk size */
+       uint32_t chunk_shift;
+       sector_t chunk_mask;
+
+       struct stripe stripe[0];
+};
+
+static inline struct stripe_c *alloc_context(int stripes)
+{
+       size_t len;
+
+       if (array_too_big(sizeof(struct stripe_c), sizeof(struct stripe),
+                         stripes))
+               return NULL;
+
+       len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes);
+
+       return kmalloc(len, GFP_KERNEL);
+}
+
+/*
+ * Parse a single <dev> <sector> pair
+ */
+static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
+                     int stripe, char **argv)
+{
+       sector_t start;
+
+       if (sscanf(argv[1], SECTOR_FORMAT, &start) != 1)
+               return -EINVAL;
+
+       if (dm_get_device(ti, argv[0], start, sc->stripe_width,
+                         dm_table_get_mode(ti->table),
+                         &sc->stripe[stripe].dev))
+               return -ENXIO;
+
+       sc->stripe[stripe].physical_start = start;
+       return 0;
+}
+
+/*
+ * FIXME: Nasty function, only present because we can't link
+ * against __moddi3 and __divdi3.
+ *
+ * returns a == b * n
+ */
+static int multiple(sector_t a, sector_t b, sector_t *n)
+{
+       sector_t acc, prev, i;
+
+       *n = 0;
+       while (a >= b) {
+               for (acc = b, prev = 0, i = 1;
+                    acc <= a;
+                    prev = acc, acc <<= 1, i <<= 1)
+                       ;
+
+               a -= prev;
+               *n += i >> 1;
+       }
+
+       return a == 0;
+}
+
+/*
+ * Construct a striped mapping.
+ * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+
+ */
+static int stripe_ctr(struct dm_target *ti, int argc, char **argv)
+{
+       struct stripe_c *sc;
+       sector_t width;
+       uint32_t stripes;
+       uint32_t chunk_size;
+       char *end;
+       int r, i;
+
+       if (argc < 2) {
+               ti->error = "dm-stripe: Not enough arguments";
+               return -EINVAL;
+       }
+
+       stripes = simple_strtoul(argv[0], &end, 10);
+       if (*end) {
+               ti->error = "dm-stripe: Invalid stripe count";
+               return -EINVAL;
+       }
+
+       chunk_size = simple_strtoul(argv[1], &end, 10);
+       if (*end) {
+               ti->error = "dm-stripe: Invalid chunk_size";
+               return -EINVAL;
+       }
+
+       if (!multiple(ti->len, stripes, &width)) {
+               ti->error = "dm-stripe: Target length not divisable by "
+                   "number of stripes";
+               return -EINVAL;
+       }
+
+       sc = alloc_context(stripes);
+       if (!sc) {
+               ti->error = "dm-stripe: Memory allocation for striped context "
+                   "failed";
+               return -ENOMEM;
+       }
+
+       sc->stripes = stripes;
+       sc->stripe_width = width;
+       ti->split_io = chunk_size;
+
+       /*
+        * chunk_size is a power of two
+        */
+       if (!chunk_size || (chunk_size & (chunk_size - 1))) {
+               ti->error = "dm-stripe: Invalid chunk size";
+               kfree(sc);
+               return -EINVAL;
+       }
+
+       sc->chunk_mask = ((sector_t) chunk_size) - 1;
+       for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++)
+               chunk_size >>= 1;
+       sc->chunk_shift--;
+
+       /*
+        * Get the stripe destinations.
+        */
+       for (i = 0; i < stripes; i++) {
+               if (argc < 2) {
+                       ti->error = "dm-stripe: Not enough destinations "
+                               "specified";
+                       kfree(sc);
+                       return -EINVAL;
+               }
+
+               argv += 2;
+
+               r = get_stripe(ti, sc, i, argv);
+               if (r < 0) {
+                       ti->error = "dm-stripe: Couldn't parse stripe "
+                               "destination";
+                       while (i--)
+                               dm_put_device(ti, sc->stripe[i].dev);
+                       kfree(sc);
+                       return r;
+               }
+       }
+
+       ti->private = sc;
+       return 0;
+}
+
+static void stripe_dtr(struct dm_target *ti)
+{
+       unsigned int i;
+       struct stripe_c *sc = (struct stripe_c *) ti->private;
+
+       for (i = 0; i < sc->stripes; i++)
+               dm_put_device(ti, sc->stripe[i].dev);
+
+       kfree(sc);
+}
+
+static int stripe_map(struct dm_target *ti, struct bio *bio)
+{
+       struct stripe_c *sc = (struct stripe_c *) ti->private;
+
+       sector_t offset = bio->bi_sector - ti->begin;
+       uint32_t chunk = (uint32_t) (offset >> sc->chunk_shift);
+       uint32_t stripe = chunk % sc->stripes;  /* 32bit modulus */
+       chunk = chunk / sc->stripes;
+
+       bio->bi_bdev = sc->stripe[stripe].dev->bdev;
+       bio->bi_sector = sc->stripe[stripe].physical_start +
+           (chunk << sc->chunk_shift) + (offset & sc->chunk_mask);
+       return 1;
+}
+
+static int stripe_status(struct dm_target *ti,
+                        status_type_t type, char *result, int maxlen)
+{
+       struct stripe_c *sc = (struct stripe_c *) ti->private;
+       int offset;
+       int i;
+
+       switch (type) {
+       case STATUSTYPE_INFO:
+               result[0] = '\0';
+               break;
+
+       case STATUSTYPE_TABLE:
+               offset = snprintf(result, maxlen, "%d " SECTOR_FORMAT,
+                                 sc->stripes, sc->chunk_mask + 1);
+               for (i = 0; i < sc->stripes; i++) {
+                       offset +=
+                           snprintf(result + offset, maxlen - offset,
+                                    " %s " SECTOR_FORMAT,
+                      kdevname(to_kdev_t(sc->stripe[i].dev->bdev->bd_dev)),
+                                    sc->stripe[i].physical_start);
+               }
+               break;
+       }
+       return 0;
+}
+
+static struct target_type stripe_target = {
+       .name   = "striped",
+       .module = THIS_MODULE,
+       .ctr    = stripe_ctr,
+       .dtr    = stripe_dtr,
+       .map    = stripe_map,
+       .status = stripe_status,
+};
+
+int __init dm_stripe_init(void)
+{
+       int r;
+
+       r = dm_register_target(&stripe_target);
+       if (r < 0)
+               DMWARN("striped target registration failed");
+
+       return r;
+}
+
+void dm_stripe_exit(void)
+{
+       if (dm_unregister_target(&stripe_target))
+               DMWARN("striped target unregistration failed");
+
+       return;
+}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
new file mode 100644 (file)
index 0000000..4178ac4
--- /dev/null
@@ -0,0 +1,736 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/blkdev.h>
+#include <linux/namei.h>
+#include <linux/ctype.h>
+#include <linux/slab.h>
+#include <asm/atomic.h>
+
+#define MAX_DEPTH 16
+#define NODE_SIZE L1_CACHE_BYTES
+#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
+#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
+
+struct dm_table {
+       atomic_t holders;
+
+       /* btree table */
+       int depth;
+       int counts[MAX_DEPTH];  /* in nodes */
+       sector_t *index[MAX_DEPTH];
+
+       int num_targets;
+       int num_allocated;
+       sector_t *highs;
+       struct dm_target *targets;
+
+       /*
+        * Indicates the rw permissions for the new logical
+        * device.  This should be a combination of FMODE_READ
+        * and FMODE_WRITE.
+        */
+       int mode;
+
+       /* a list of devices used by this table */
+       struct list_head devices;
+
+       /*
+        * These are optimistic limits taken from all the
+        * targets, some targets will need smaller limits.
+        */
+       struct io_restrictions limits;
+
+       /*
+        * A waitqueue for processes waiting for something
+        * interesting to happen to this table.
+        */
+       wait_queue_head_t eventq;
+};
+
+/*
+ * Ceiling(n / size)
+ */
+static inline unsigned long div_up(unsigned long n, unsigned long size)
+{
+       return dm_round_up(n, size) / size;
+}
+
+/*
+ * Similar to ceiling(log_size(n))
+ */
+static unsigned int int_log(unsigned long n, unsigned long base)
+{
+       int result = 0;
+
+       while (n > 1) {
+               n = div_up(n, base);
+               result++;
+       }
+
+       return result;
+}
+
+#define __HIGH(l, r) if (*(l) < (r)) *(l) = (r)
+#define __LOW(l, r) if (*(l) < (r)) *(l) = (r)
+
+/*
+ * Combine two io_restrictions, always taking the lower value.
+ */
+
+static void combine_restrictions_low(struct io_restrictions *lhs,
+                                    struct io_restrictions *rhs)
+{
+       __LOW(&lhs->max_sectors, rhs->max_sectors);
+       __LOW(&lhs->max_phys_segments, rhs->max_phys_segments);
+       __LOW(&lhs->max_hw_segments, rhs->max_hw_segments);
+       __HIGH(&lhs->hardsect_size, rhs->hardsect_size);
+       __LOW(&lhs->max_segment_size, rhs->max_segment_size);
+       __LOW(&lhs->seg_boundary_mask, rhs->seg_boundary_mask);
+}
+
+/*
+ * Calculate the index of the child node of the n'th node k'th key.
+ */
+static inline int get_child(int n, int k)
+{
+       return (n * CHILDREN_PER_NODE) + k;
+}
+
+/*
+ * Return the n'th node of level l from table t.
+ */
+static inline sector_t *get_node(struct dm_table *t, int l, int n)
+{
+       return t->index[l] + (n * KEYS_PER_NODE);
+}
+
+/*
+ * Return the highest key that you could lookup from the n'th
+ * node on level l of the btree.
+ */
+static sector_t high(struct dm_table *t, int l, int n)
+{
+       for (; l < t->depth - 1; l++)
+               n = get_child(n, CHILDREN_PER_NODE - 1);
+
+       if (n >= t->counts[l])
+               return (sector_t) - 1;
+
+       return get_node(t, l, n)[KEYS_PER_NODE - 1];
+}
+
+/*
+ * Fills in a level of the btree based on the highs of the level
+ * below it.
+ */
+static int setup_btree_index(int l, struct dm_table *t)
+{
+       int n, k;
+       sector_t *node;
+
+       for (n = 0; n < t->counts[l]; n++) {
+               node = get_node(t, l, n);
+
+               for (k = 0; k < KEYS_PER_NODE; k++)
+                       node[k] = high(t, l + 1, get_child(n, k));
+       }
+
+       return 0;
+}
+
+/*
+ * highs, and targets are managed as dynamic arrays during a
+ * table load.
+ */
+static int alloc_targets(struct dm_table *t, int num)
+{
+       sector_t *n_highs;
+       struct dm_target *n_targets;
+       int n = t->num_targets;
+
+       /*
+        * Allocate both the target array and offset array at once.
+        */
+       n_highs = (sector_t *) vcalloc(sizeof(struct dm_target) +
+                                      sizeof(sector_t),
+                                      num);
+       if (!n_highs)
+               return -ENOMEM;
+
+       n_targets = (struct dm_target *) (n_highs + num);
+
+       if (n) {
+               memcpy(n_highs, t->highs, sizeof(*n_highs) * n);
+               memcpy(n_targets, t->targets, sizeof(*n_targets) * n);
+       }
+
+       memset(n_highs + n, -1, sizeof(*n_highs) * (num - n));
+       vfree(t->highs);
+
+       t->num_allocated = num;
+       t->highs = n_highs;
+       t->targets = n_targets;
+
+       return 0;
+}
+
+int dm_table_create(struct dm_table **result, int mode)
+{
+       struct dm_table *t = kmalloc(sizeof(*t), GFP_NOIO);
+
+       if (!t)
+               return -ENOMEM;
+
+       memset(t, 0, sizeof(*t));
+       INIT_LIST_HEAD(&t->devices);
+       atomic_set(&t->holders, 1);
+
+       /* allocate a single nodes worth of targets to begin with */
+       if (alloc_targets(t, KEYS_PER_NODE)) {
+               kfree(t);
+               t = NULL;
+               return -ENOMEM;
+       }
+
+       init_waitqueue_head(&t->eventq);
+       t->mode = mode;
+       *result = t;
+       return 0;
+}
+
+static void free_devices(struct list_head *devices)
+{
+       struct list_head *tmp, *next;
+
+       for (tmp = devices->next; tmp != devices; tmp = next) {
+               struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+               next = tmp->next;
+               kfree(dd);
+       }
+}
+
+void table_destroy(struct dm_table *t)
+{
+       int i;
+
+       DMWARN("destroying table");
+
+       /* destroying the table counts as an event */
+       dm_table_event(t);
+
+       /* free the indexes (see dm_table_complete) */
+       if (t->depth >= 2)
+               vfree(t->index[t->depth - 2]);
+
+       /* free the targets */
+       for (i = 0; i < t->num_targets; i++) {
+               struct dm_target *tgt = &t->targets[i];
+
+               dm_put_target_type(t->targets[i].type);
+
+               if (tgt->type->dtr)
+                       tgt->type->dtr(tgt);
+       }
+
+       vfree(t->highs);
+
+       /* free the device list */
+       if (t->devices.next != &t->devices) {
+               DMWARN("devices still present during destroy: "
+                      "dm_table_remove_device calls missing");
+
+               free_devices(&t->devices);
+       }
+
+       kfree(t);
+}
+
+void dm_table_get(struct dm_table *t)
+{
+       atomic_inc(&t->holders);
+}
+
+void dm_table_put(struct dm_table *t)
+{
+       if (atomic_dec_and_test(&t->holders))
+               table_destroy(t);
+}
+
+/*
+ * Checks to see if we need to extend highs or targets.
+ */
+static inline int check_space(struct dm_table *t)
+{
+       if (t->num_targets >= t->num_allocated)
+               return alloc_targets(t, t->num_allocated * 2);
+
+       return 0;
+}
+
+/*
+ * Convert a device path to a dev_t.
+ */
+static int lookup_device(const char *path, dev_t *dev)
+{
+       int r;
+       struct nameidata nd;
+       struct inode *inode;
+
+       if ((r = path_lookup(path, LOOKUP_FOLLOW, &nd)))
+               return r;
+
+       inode = nd.dentry->d_inode;
+       if (!inode) {
+               r = -ENOENT;
+               goto out;
+       }
+
+       if (!S_ISBLK(inode->i_mode)) {
+               r = -EINVAL;
+               goto out;
+       }
+
+       *dev = kdev_t_to_nr(inode->i_rdev);
+
+ out:
+       path_release(&nd);
+       return r;
+}
+
+/*
+ * See if we've already got a device in the list.
+ */
+static struct dm_dev *find_device(struct list_head *l, dev_t dev)
+{
+       struct list_head *tmp;
+
+       list_for_each(tmp, l) {
+               struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+               if (dd->bdev->bd_dev == dev)
+                       return dd;
+       }
+
+       return NULL;
+}
+
+/*
+ * Open a device so we can use it as a map destination.
+ */
+static int open_dev(struct dm_dev *d, dev_t dev)
+{
+       static char *_claim_ptr = "I belong to device-mapper";
+
+       int r;
+
+       if (d->bdev)
+               BUG();
+
+       d->bdev = bdget(dev);
+       if (!d->bdev)
+               return -ENOMEM;
+
+       r = blkdev_get(d->bdev, d->mode, 0, BDEV_RAW);
+       if (!r)
+               return r;
+
+       r = bd_claim(d->bdev, _claim_ptr);
+       if (r) {
+               blkdev_put(d->bdev, BDEV_RAW);
+               d->bdev = NULL;
+       }
+
+       return r;
+}
+
+/*
+ * Close a device that we've been using.
+ */
+static void close_dev(struct dm_dev *d)
+{
+       if (!d->bdev)
+               return;
+
+       bd_release(d->bdev);
+       blkdev_put(d->bdev, BDEV_RAW);
+       d->bdev = NULL;
+}
+
+/*
+ * If possible (ie. blk_size[major] is set), this checks an area
+ * of a destination device is valid.
+ */
+static int check_device_area(struct dm_dev *dd, sector_t start, sector_t len)
+{
+       sector_t dev_size;
+       dev_size = dd->bdev->bd_inode->i_size;
+       return ((start < dev_size) && (len <= (dev_size - start)));
+}
+
+/*
+ * This upgrades the mode on an already open dm_dev.  Being
+ * careful to leave things as they were if we fail to reopen the
+ * device.
+ */
+static int upgrade_mode(struct dm_dev *dd, int new_mode)
+{
+       int r;
+       struct dm_dev dd_copy;
+       dev_t dev = dd->bdev->bd_dev;
+
+       memcpy(&dd_copy, dd, sizeof(dd_copy));
+
+       dd->mode |= new_mode;
+       dd->bdev = NULL;
+       r = open_dev(dd, dev);
+       if (!r)
+               close_dev(&dd_copy);
+       else
+               memcpy(dd, &dd_copy, sizeof(dd_copy));
+
+       return r;
+}
+
+/*
+ * Add a device to the list, or just increment the usage count if
+ * it's already present.
+ */
+static int __table_get_device(struct dm_table *t, struct dm_target *ti,
+                             const char *path, sector_t start, sector_t len,
+                             int mode, struct dm_dev **result)
+{
+       int r;
+       dev_t dev;
+       struct dm_dev *dd;
+       int major, minor;
+
+       if (!t)
+               BUG();
+
+       if (sscanf(path, "%x:%x", &major, &minor) == 2) {
+               /* Extract the major/minor numbers */
+               dev = MKDEV(major, minor);
+       } else {
+               /* convert the path to a device */
+               if ((r = lookup_device(path, &dev)))
+                       return r;
+       }
+
+       dd = find_device(&t->devices, dev);
+       if (!dd) {
+               dd = kmalloc(sizeof(*dd), GFP_KERNEL);
+               if (!dd)
+                       return -ENOMEM;
+
+               dd->mode = mode;
+               dd->bdev = NULL;
+
+               if ((r = open_dev(dd, dev))) {
+                       kfree(dd);
+                       return r;
+               }
+
+               atomic_set(&dd->count, 0);
+               list_add(&dd->list, &t->devices);
+
+       } else if (dd->mode != (mode | dd->mode)) {
+               r = upgrade_mode(dd, mode);
+               if (r)
+                       return r;
+       }
+       atomic_inc(&dd->count);
+
+       if (!check_device_area(dd, start, len)) {
+               DMWARN("device %s too small for target", path);
+               dm_put_device(ti, dd);
+               return -EINVAL;
+       }
+
+       *result = dd;
+
+       return 0;
+}
+
+
+int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
+                 sector_t len, int mode, struct dm_dev **result)
+{
+       int r = __table_get_device(ti->table, ti, path,
+                                  start, len, mode, result);
+       if (!r) {
+               request_queue_t *q = bdev_get_queue((*result)->bdev);
+               struct io_restrictions *rs = &ti->limits;
+
+               /* combine the device limits low */
+               __LOW(&rs->max_sectors, q->max_sectors);
+               __LOW(&rs->max_phys_segments, q->max_phys_segments);
+               __LOW(&rs->max_hw_segments, q->max_hw_segments);
+               __HIGH(&rs->hardsect_size, q->hardsect_size);
+               __LOW(&rs->max_segment_size, q->max_segment_size);
+               __LOW(&rs->seg_boundary_mask, q->seg_boundary_mask);
+       }
+
+       return r;
+}
+
+/*
+ * Decrement a devices use count and remove it if neccessary.
+ */
+void dm_put_device(struct dm_target *ti, struct dm_dev *dd)
+{
+       if (atomic_dec_and_test(&dd->count)) {
+               close_dev(dd);
+               list_del(&dd->list);
+               kfree(dd);
+       }
+}
+
+/*
+ * Checks to see if the target joins onto the end of the table.
+ */
+static int adjoin(struct dm_table *table, struct dm_target *ti)
+{
+       struct dm_target *prev;
+
+       if (!table->num_targets)
+               return !ti->begin;
+
+       prev = &table->targets[table->num_targets - 1];
+       return (ti->begin == (prev->begin + prev->len));
+}
+
+/*
+ * Destructively splits up the argument list to pass to ctr.
+ */
+static int split_args(int max, int *argc, char **argv, char *input)
+{
+       char *start, *end = input, *out;
+       *argc = 0;
+
+       while (1) {
+               start = end;
+
+               /* Skip whitespace */
+               while (*start && isspace(*start))
+                       start++;
+
+               if (!*start)
+                       break;  /* success, we hit the end */
+
+               /* 'out' is used to remove any back-quotes */
+               end = out = start;
+               while (*end) {
+                       /* Everything apart from '\0' can be quoted */
+                       if (*end == '\\' && *(end + 1)) {
+                               *out++ = *(end + 1);
+                               end += 2;
+                               continue;
+                       }
+
+                       if (isspace(*end))
+                               break;  /* end of token */
+
+                       *out++ = *end++;
+               }
+
+               /* have we already filled the array ? */
+               if ((*argc + 1) > max)
+                       return -EINVAL;
+
+               /* we know this is whitespace */
+               if (*end)
+                       end++;
+
+               /* terminate the string and put it in the array */
+               *out = '\0';
+               argv[*argc] = start;
+               (*argc)++;
+       }
+
+       return 0;
+}
+
+int dm_table_add_target(struct dm_table *t, const char *type,
+                       sector_t start, sector_t len, char *params)
+{
+       int r, argc;
+       char *argv[32];
+       struct target_type *tt;
+       struct dm_target *tgt;
+
+       if ((r = check_space(t)))
+               return r;
+
+       tgt = t->targets + t->num_targets;
+       memset(tgt, 0, sizeof(*tgt));
+
+       tt = dm_get_target_type(type);
+       if (!tt) {
+               tgt->error = "unknown target type";
+               return -EINVAL;
+       }
+
+       tgt->table = t;
+       tgt->type = tt;
+       tgt->begin = start;
+       tgt->len = len;
+       tgt->error = "Unknown error";
+
+       /*
+        * Does this target adjoin the previous one ?
+        */
+       if (!adjoin(t, tgt)) {
+               DMERR("Gap in table");
+               dm_put_target_type(tt);
+               return -EINVAL;
+       }
+
+       r = split_args(ARRAY_SIZE(argv), &argc, argv, params);
+       if (r) {
+               tgt->error = "couldn't split parameters";
+               dm_put_target_type(tt);
+               return r;
+       }
+
+       r = tt->ctr(tgt, argc, argv);
+       if (r) {
+               dm_put_target_type(tt);
+               return r;
+       }
+
+       t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
+
+       /* FIXME: the plan is to combine high here and then have
+        * the merge fn apply the target level restrictions. */
+       combine_restrictions_low(&t->limits, &tgt->limits);
+       return 0;
+}
+
+static int setup_indexes(struct dm_table *t)
+{
+       int i, total = 0;
+       sector_t *indexes;
+
+       /* allocate the space for *all* the indexes */
+       for (i = t->depth - 2; i >= 0; i--) {
+               t->counts[i] = div_up(t->counts[i + 1], CHILDREN_PER_NODE);
+               total += t->counts[i];
+       }
+
+       indexes = (sector_t *) vcalloc(total, (unsigned long) NODE_SIZE);
+       if (!indexes)
+               return -ENOMEM;
+
+       /* set up internal nodes, bottom-up */
+       for (i = t->depth - 2, total = 0; i >= 0; i--) {
+               t->index[i] = indexes;
+               indexes += (KEYS_PER_NODE * t->counts[i]);
+               setup_btree_index(i, t);
+       }
+
+       return 0;
+}
+
+/*
+ * Builds the btree to index the map.
+ */
+int dm_table_complete(struct dm_table *t)
+{
+       int leaf_nodes, r = 0;
+
+       /* how many indexes will the btree have ? */
+       leaf_nodes = div_up(t->num_targets, KEYS_PER_NODE);
+       t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
+
+       /* leaf layer has already been set up */
+       t->counts[t->depth - 1] = leaf_nodes;
+       t->index[t->depth - 1] = t->highs;
+
+       if (t->depth >= 2)
+               r = setup_indexes(t);
+
+       return r;
+}
+
+void dm_table_event(struct dm_table *t)
+{
+       wake_up_interruptible(&t->eventq);
+}
+
+sector_t dm_table_get_size(struct dm_table *t)
+{
+       return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
+}
+
+struct dm_target *dm_table_get_target(struct dm_table *t, int index)
+{
+       if (index > t->num_targets)
+               return NULL;
+
+       return t->targets + index;
+}
+
+/*
+ * Search the btree for the correct target.
+ */
+struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
+{
+       int l, n = 0, k = 0;
+       sector_t *node;
+
+       for (l = 0; l < t->depth; l++) {
+               n = get_child(n, k);
+               node = get_node(t, l, n);
+
+               for (k = 0; k < KEYS_PER_NODE; k++)
+                       if (node[k] >= sector)
+                               break;
+       }
+
+       return &t->targets[(KEYS_PER_NODE * n) + k];
+}
+
+void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
+{
+       /*
+        * Make sure we obey the optimistic sub devices
+        * restrictions.
+        */
+       q->max_sectors = t->limits.max_sectors;
+       q->max_phys_segments = t->limits.max_phys_segments;
+       q->max_hw_segments = t->limits.max_hw_segments;
+       q->hardsect_size = t->limits.hardsect_size;
+       q->max_segment_size = t->limits.max_segment_size;
+       q->seg_boundary_mask = t->limits.seg_boundary_mask;
+}
+
+unsigned int dm_table_get_num_targets(struct dm_table *t)
+{
+       return t->num_targets;
+}
+
+struct list_head *dm_table_get_devices(struct dm_table *t)
+{
+       return &t->devices;
+}
+
+int dm_table_get_mode(struct dm_table *t)
+{
+       return t->mode;
+}
+
+void dm_table_add_wait_queue(struct dm_table *t, wait_queue_t *wq)
+{
+       add_wait_queue(&t->eventq, wq);
+}
+
+EXPORT_SYMBOL(dm_get_device);
+EXPORT_SYMBOL(dm_put_device);
+EXPORT_SYMBOL(dm_table_event);
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
new file mode 100644 (file)
index 0000000..6bf8310
--- /dev/null
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/bio.h>
+#include <linux/slab.h>
+
+struct tt_internal {
+       struct target_type tt;
+
+       struct list_head list;
+       long use;
+};
+
+static LIST_HEAD(_targets);
+static rwlock_t _lock = RW_LOCK_UNLOCKED;
+
+#define DM_MOD_NAME_SIZE 32
+
+static inline struct tt_internal *__find_target_type(const char *name)
+{
+       struct list_head *tih;
+       struct tt_internal *ti;
+
+       list_for_each(tih, &_targets) {
+               ti = list_entry(tih, struct tt_internal, list);
+
+               if (!strcmp(name, ti->tt.name))
+                       return ti;
+       }
+
+       return NULL;
+}
+
+static struct tt_internal *get_target_type(const char *name)
+{
+       struct tt_internal *ti;
+
+       read_lock(&_lock);
+       ti = __find_target_type(name);
+
+       if (ti) {
+               if (ti->use == 0 && ti->tt.module)
+                       __MOD_INC_USE_COUNT(ti->tt.module);
+               ti->use++;
+       }
+       read_unlock(&_lock);
+
+       return ti;
+}
+
+static void load_module(const char *name)
+{
+       char module_name[DM_MOD_NAME_SIZE] = "dm-";
+
+       /* Length check for strcat() below */
+       if (strlen(name) > (DM_MOD_NAME_SIZE - 4))
+               return;
+
+       strcat(module_name, name);
+       request_module(module_name);
+
+       return;
+}
+
+struct target_type *dm_get_target_type(const char *name)
+{
+       struct tt_internal *ti = get_target_type(name);
+
+       if (!ti) {
+               load_module(name);
+               ti = get_target_type(name);
+       }
+
+       return ti ? &ti->tt : NULL;
+}
+
+void dm_put_target_type(struct target_type *t)
+{
+       struct tt_internal *ti = (struct tt_internal *) t;
+
+       read_lock(&_lock);
+       if (--ti->use == 0 && ti->tt.module)
+               __MOD_DEC_USE_COUNT(ti->tt.module);
+
+       if (ti->use < 0)
+               BUG();
+       read_unlock(&_lock);
+
+       return;
+}
+
+static struct tt_internal *alloc_target(struct target_type *t)
+{
+       struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+
+       if (ti) {
+               memset(ti, 0, sizeof(*ti));
+               ti->tt = *t;
+       }
+
+       return ti;
+}
+
+int dm_register_target(struct target_type *t)
+{
+       int rv = 0;
+       struct tt_internal *ti = alloc_target(t);
+
+       if (!ti)
+               return -ENOMEM;
+
+       write_lock(&_lock);
+       if (__find_target_type(t->name))
+               rv = -EEXIST;
+       else
+               list_add(&ti->list, &_targets);
+
+       write_unlock(&_lock);
+       return rv;
+}
+
+int dm_unregister_target(struct target_type *t)
+{
+       struct tt_internal *ti;
+
+       write_lock(&_lock);
+       if (!(ti = __find_target_type(t->name))) {
+               write_unlock(&_lock);
+               return -EINVAL;
+       }
+
+       if (ti->use) {
+               write_unlock(&_lock);
+               return -ETXTBSY;
+       }
+
+       list_del(&ti->list);
+       kfree(ti);
+
+       write_unlock(&_lock);
+       return 0;
+}
+
+/*
+ * io-err: always fails an io, useful for bringing
+ * up LVs that have holes in them.
+ */
+static int io_err_ctr(struct dm_target *ti, int argc, char **args)
+{
+       return 0;
+}
+
+static void io_err_dtr(struct dm_target *ti)
+{
+       /* empty */
+       return;
+}
+
+static int io_err_map(struct dm_target *ti, struct bio *bio)
+{
+       bio_io_error(bio, 0);
+       return 0;
+}
+
+static struct target_type error_target = {
+       .name = "error",
+       .ctr  = io_err_ctr,
+       .dtr  = io_err_dtr,
+       .map  = io_err_map,
+};
+
+int dm_target_init(void)
+{
+       return dm_register_target(&error_target);
+}
+
+void dm_target_exit(void)
+{
+       if (dm_unregister_target(&error_target))
+               DMWARN("error target unregistration failed");
+}
+
+EXPORT_SYMBOL(dm_register_target);
+EXPORT_SYMBOL(dm_unregister_target);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
new file mode 100644 (file)
index 0000000..fd13e20
--- /dev/null
@@ -0,0 +1,796 @@
+/*
+ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/blk.h>
+#include <linux/blkpg.h>
+#include <linux/bio.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+
+static const char *_name = DM_NAME;
+#define MAX_DEVICES 256
+#define SECTOR_SHIFT 9
+
+static int major = 0;
+static int _major = 0;
+
+struct dm_io {
+       struct mapped_device *md;
+       int error;
+       struct bio *bio;
+       atomic_t io_count;
+};
+
+struct deferred_io {
+       struct bio *bio;
+       struct deferred_io *next;
+};
+
+/*
+ * Bits for the md->flags field.
+ */
+#define DMF_BLOCK_IO 0
+#define DMF_SUSPENDED 1
+
+struct mapped_device {
+       struct rw_semaphore lock;
+
+       kdev_t kdev;
+       atomic_t holders;
+
+       unsigned long flags;
+
+       request_queue_t queue;
+       struct gendisk *disk;
+
+       /*
+        * A list of ios that arrived while we were suspended.
+        */
+       atomic_t pending;
+       wait_queue_head_t wait;
+       struct deferred_io *deferred;
+
+       /*
+        * The current mapping.
+        */
+       struct dm_table *map;
+};
+
+#define MIN_IOS 256
+static kmem_cache_t *_io_cache;
+static mempool_t *_io_pool;
+
+static __init int local_init(void)
+{
+       int r;
+
+       /* allocate a slab for the dm_ios */
+       _io_cache = kmem_cache_create("dm io",
+                                     sizeof(struct dm_io), 0, 0, NULL, NULL);
+       if (!_io_cache)
+               return -ENOMEM;
+
+       _io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
+                                 mempool_free_slab, _io_cache);
+       if (!_io_pool) {
+               kmem_cache_destroy(_io_cache);
+               return -ENOMEM;
+       }
+
+       _major = major;
+       r = register_blkdev(_major, _name, &dm_blk_dops);
+       if (r < 0) {
+               DMERR("register_blkdev failed");
+               mempool_destroy(_io_pool);
+               kmem_cache_destroy(_io_cache);
+               return r;
+       }
+
+       if (!_major)
+               _major = r;
+
+       return 0;
+}
+
+static void local_exit(void)
+{
+       mempool_destroy(_io_pool);
+       kmem_cache_destroy(_io_cache);
+
+       if (unregister_blkdev(_major, _name) < 0)
+               DMERR("devfs_unregister_blkdev failed");
+
+       _major = 0;
+
+       DMINFO("cleaned up");
+}
+
+/*
+ * We have a lot of init/exit functions, so it seems easier to
+ * store them in an array.  The disposable macro 'xx'
+ * expands a prefix into a pair of function names.
+ */
+static struct {
+       int (*init) (void);
+       void (*exit) (void);
+
+} _inits[] = {
+#define xx(n) {n ## _init, n ## _exit},
+       xx(local)
+       xx(dm_target)
+       xx(dm_linear)
+       xx(dm_stripe)
+       xx(dm_interface)
+#undef xx
+};
+
+static int __init dm_init(void)
+{
+       const int count = ARRAY_SIZE(_inits);
+
+       int r, i;
+
+       for (i = 0; i < count; i++) {
+               r = _inits[i].init();
+               if (r)
+                       goto bad;
+       }
+
+       return 0;
+
+      bad:
+       while (i--)
+               _inits[i].exit();
+
+       return r;
+}
+
+static void __exit dm_exit(void)
+{
+       int i = ARRAY_SIZE(_inits);
+
+       while (i--)
+               _inits[i].exit();
+}
+
+/*
+ * Block device functions
+ */
+static int dm_blk_open(struct inode *inode, struct file *file)
+{
+       struct mapped_device *md;
+
+       md = inode->i_bdev->bd_disk->private_data;
+       dm_get(md);
+       return 0;
+}
+
+static int dm_blk_close(struct inode *inode, struct file *file)
+{
+       struct mapped_device *md;
+
+       md = inode->i_bdev->bd_disk->private_data;
+       dm_put(md);
+       return 0;
+}
+
+static inline struct dm_io *alloc_io(void)
+{
+       return mempool_alloc(_io_pool, GFP_NOIO);
+}
+
+static inline void free_io(struct dm_io *io)
+{
+       mempool_free(io, _io_pool);
+}
+
+static inline struct deferred_io *alloc_deferred(void)
+{
+       return kmalloc(sizeof(struct deferred_io), GFP_NOIO);
+}
+
+static inline void free_deferred(struct deferred_io *di)
+{
+       kfree(di);
+}
+
+/*
+ * Add the bio to the list of deferred io.
+ */
+static int queue_io(struct mapped_device *md, struct bio *bio)
+{
+       struct deferred_io *di;
+
+       di = alloc_deferred();
+       if (!di)
+               return -ENOMEM;
+
+       down_write(&md->lock);
+
+       if (!test_bit(DMF_SUSPENDED, &md->flags)) {
+               up_write(&md->lock);
+               free_deferred(di);
+               return 1;
+       }
+
+       di->bio = bio;
+       di->next = md->deferred;
+       md->deferred = di;
+
+       up_write(&md->lock);
+       return 0;               /* deferred successfully */
+}
+
+/*-----------------------------------------------------------------
+ * CRUD START:
+ *   A more elegant soln is in the works that uses the queue
+ *   merge fn, unfortunately there are a couple of changes to
+ *   the block layer that I want to make for this.  So in the
+ *   interests of getting something for people to use I give
+ *   you this clearly demarcated crap.
+ *---------------------------------------------------------------*/
+
+/*
+ * Decrements the number of outstanding ios that a bio has been
+ * cloned into, completing the original io if necc.
+ */
+static inline void dec_pending(struct dm_io *io, int error)
+{
+       static spinlock_t _uptodate_lock = SPIN_LOCK_UNLOCKED;
+       unsigned long flags;
+
+       spin_lock_irqsave(&_uptodate_lock, flags);
+       if (error)
+               io->error = error;
+       spin_unlock_irqrestore(&_uptodate_lock, flags);
+
+       if (atomic_dec_and_test(&io->io_count)) {
+               if (atomic_dec_and_test(&io->md->pending))
+                       /* nudge anyone waiting on suspend queue */
+                       wake_up(&io->md->wait);
+
+               bio_endio(io->bio, io->error ? 0 : io->bio->bi_size, io->error);
+               free_io(io);
+       }
+}
+
+static int clone_endio(struct bio *bio, unsigned int done, int error)
+{
+       struct dm_io *io = bio->bi_private;
+
+       /*
+        * Only call dec_pending if the clone has completely
+        * finished.  If a partial io errors I'm assuming it won't
+        * be requeued.  FIXME: check this.
+        */
+       if (error || !bio->bi_size) {
+               dec_pending(io, error);
+               bio_put(bio);
+       }
+
+       return 0;
+}
+
+
+static sector_t max_io_len(struct mapped_device *md,
+                          sector_t sector, struct dm_target *ti)
+{
+       sector_t len = ti->len;
+
+       /* FIXME: obey io_restrictions ! */
+
+       /*
+        * Does the target need to split even further ?
+        */
+       if (ti->split_io) {
+               sector_t boundary;
+               sector_t offset = sector - ti->begin;
+               boundary = dm_round_up(offset + 1, ti->split_io) - offset;
+
+               if (len > boundary)
+                       len = boundary;
+       }
+
+       return len;
+}
+
+static void __map_bio(struct dm_target *ti, struct bio *clone)
+{
+       struct dm_io *io = clone->bi_private;
+       int r;
+
+       /*
+        * Sanity checks.
+        */
+       if (!clone->bi_size)
+               BUG();
+
+       /*
+        * Map the clone.  If r == 0 we don't need to do
+        * anything, the target has assumed ownership of
+        * this io.
+        */
+       atomic_inc(&io->md->pending);
+       atomic_inc(&io->io_count);
+       r = ti->type->map(ti, clone);
+       if (r > 0)
+               /* the bio has been remapped so dispatch it */
+               generic_make_request(clone);
+
+       else if (r < 0)
+               /* error the io and bail out */
+               dec_pending(io, -EIO);
+}
+
+struct clone_info {
+       struct mapped_device *md;
+       struct bio *bio;
+       struct dm_io *io;
+       sector_t sector;
+       sector_t sector_count;
+       unsigned short idx;
+};
+
+/*
+ * Issues a little bio that just does the back end of a split page.
+ */
+static void __split_page(struct clone_info *ci, unsigned int len)
+{
+       struct dm_target *ti = dm_table_find_target(ci->md->map, ci->sector);
+       struct bio *clone, *bio = ci->bio;
+       struct bio_vec *bv = bio->bi_io_vec + (bio->bi_vcnt - 1);
+
+       DMWARN("splitting page");
+
+       if (len > ci->sector_count)
+               len = ci->sector_count;
+
+       clone = bio_alloc(GFP_NOIO, 1);
+       memcpy(clone->bi_io_vec, bv, sizeof(*bv));
+
+       clone->bi_sector = ci->sector;
+       clone->bi_bdev = bio->bi_bdev;
+       clone->bi_flags = bio->bi_flags | (1 << BIO_SEG_VALID);
+       clone->bi_rw = bio->bi_rw;
+       clone->bi_size = len << SECTOR_SHIFT;
+       clone->bi_end_io = clone_endio;
+       clone->bi_private = ci->io;
+
+       ci->sector += len;
+       ci->sector_count -= len;
+
+       __map_bio(ti, clone);
+}
+
+static void __clone_and_map(struct clone_info *ci)
+{
+       struct bio *clone, *bio = ci->bio;
+       struct dm_target *ti = dm_table_find_target(ci->md->map, ci->sector);
+       sector_t len = max_io_len(ci->md, bio->bi_sector, ti);
+
+       /* shorter than current target ? */
+       if (ci->sector_count < len)
+               len = ci->sector_count;
+
+       /* create the clone */
+       clone = bio_clone(ci->bio, GFP_NOIO);
+       clone->bi_sector = ci->sector;
+       clone->bi_idx = ci->idx;
+       clone->bi_size = len << SECTOR_SHIFT;
+       clone->bi_end_io = clone_endio;
+       clone->bi_private = ci->io;
+
+       /* adjust the remaining io */
+       ci->sector += len;
+       ci->sector_count -= len;
+       __map_bio(ti, clone);
+
+       /*
+        * If we are not performing all remaining io in this
+        * clone then we need to calculate ci->idx for the next
+        * time round.
+        */
+       if (ci->sector_count) {
+               while (len) {
+                       struct bio_vec *bv = clone->bi_io_vec + ci->idx;
+                       sector_t bv_len = bv->bv_len >> SECTOR_SHIFT;
+                       if (bv_len <= len)
+                               len -= bv_len;
+
+                       else {
+                               __split_page(ci, bv_len - len);
+                               len = 0;
+                       }
+                       ci->idx++;
+               }
+       }
+}
+
+/*
+ * Split the bio into several clones.
+ */
+static void __split_bio(struct mapped_device *md, struct bio *bio)
+{
+       struct clone_info ci;
+
+       ci.md = md;
+       ci.bio = bio;
+       ci.io = alloc_io();
+       ci.io->error = 0;
+       atomic_set(&ci.io->io_count, 1);
+       ci.io->bio = bio;
+       ci.io->md = md;
+       ci.sector = bio->bi_sector;
+       ci.sector_count = bio_sectors(bio);
+       ci.idx = 0;
+
+       while (ci.sector_count)
+               __clone_and_map(&ci);
+
+       /* drop the extra reference count */
+       dec_pending(ci.io, 0);
+}
+/*-----------------------------------------------------------------
+ * CRUD END
+ *---------------------------------------------------------------*/
+
+
+/*
+ * The request function that just remaps the bio built up by
+ * dm_merge_bvec.
+ */
+static int dm_request(request_queue_t *q, struct bio *bio)
+{
+       int r;
+       struct mapped_device *md = q->queuedata;
+
+       down_read(&md->lock);
+
+       /*
+        * If we're suspended we have to queue
+        * this io for later.
+        */
+       while (test_bit(DMF_BLOCK_IO, &md->flags)) {
+               up_read(&md->lock);
+
+               if (bio_rw(bio) == READA) {
+                       bio_io_error(bio, 0);
+                       return 0;
+               }
+
+               r = queue_io(md, bio);
+               if (r < 0) {
+                       bio_io_error(bio, 0);
+                       return 0;
+
+               } else if (r == 0)
+                       return 0;       /* deferred successfully */
+
+               /*
+                * We're in a while loop, because someone could suspend
+                * before we get to the following read lock.
+                */
+               down_read(&md->lock);
+       }
+
+       __split_bio(md, bio);
+       up_read(&md->lock);
+       return 0;
+}
+
+/*
+ * See if the device with a specific minor # is free.
+ */
+static int specific_dev(int minor, struct mapped_device *md)
+{
+       struct gendisk *disk;
+       int part;
+
+       if (minor >= MAX_DEVICES) {
+               DMWARN("request for a mapped_device beyond MAX_DEVICES (%d)",
+                      MAX_DEVICES);
+               return -EINVAL;
+       }
+
+       disk = get_gendisk(MKDEV(_major, minor), &part);
+       if (disk) {
+               put_disk(disk);
+               return -EBUSY;
+       }
+
+       return minor;
+}
+
+static int any_old_dev(struct mapped_device *md)
+{
+       int i;
+
+       for (i = 0; i < MAX_DEVICES; i++)
+               if (specific_dev(i, md) >= 0) {
+                       DMWARN("allocating minor = %d", i);
+                       return i;
+               }
+
+       return -EBUSY;
+}
+
+/*
+ * Allocate and initialise a blank device with a given minor.
+ */
+static struct mapped_device *alloc_dev(int minor)
+{
+       struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
+
+       if (!md) {
+               DMWARN("unable to allocate device, out of memory.");
+               return NULL;
+       }
+
+       /* get a minor number for the dev */
+       minor = (minor < 0) ? any_old_dev(md) : specific_dev(minor, md);
+       if (minor < 0) {
+               kfree(md);
+               return NULL;
+       }
+
+       memset(md, 0, sizeof(*md));
+       init_rwsem(&md->lock);
+       md->kdev = mk_kdev(_major, minor);
+       atomic_set(&md->holders, 1);
+
+       md->queue.queuedata = md;
+       blk_queue_make_request(&md->queue, dm_request);
+
+       md->disk = alloc_disk(1);
+       if (!md->disk) {
+               kfree(md);
+               return NULL;
+       }
+
+       md->disk->major = _major;
+       md->disk->first_minor = minor;
+       md->disk->fops = &dm_blk_dops;
+       md->disk->queue = &md->queue;
+       md->disk->private_data = md;
+       sprintf(md->disk->disk_name, "dm-%d", minor);
+       add_disk(md->disk);
+
+       atomic_set(&md->pending, 0);
+       init_waitqueue_head(&md->wait);
+       return md;
+}
+
+static void free_dev(struct mapped_device *md)
+{
+       del_gendisk(md->disk);
+       put_disk(md->disk);
+       kfree(md);
+}
+
+/*
+ * Bind a table to the device.
+ */
+static int __bind(struct mapped_device *md, struct dm_table *t)
+{
+       request_queue_t *q = &md->queue;
+       sector_t size;
+       md->map = t;
+
+       size = dm_table_get_size(t);
+       set_capacity(md->disk, size);
+       if (size == 0)
+               return 0;
+
+       dm_table_get(t);
+       dm_table_set_restrictions(t, q);
+       return 0;
+}
+
+static void __unbind(struct mapped_device *md)
+{
+       dm_table_put(md->map);
+       md->map = NULL;
+       set_capacity(md->disk, 0);
+}
+
+/*
+ * Constructor for a new device.
+ */
+int dm_create(int minor, struct dm_table *table, struct mapped_device **result)
+{
+       int r;
+       struct mapped_device *md;
+
+       md = alloc_dev(minor);
+       if (!md)
+               return -ENXIO;
+
+       r = __bind(md, table);
+       if (r) {
+               free_dev(md);
+               return r;
+       }
+
+       *result = md;
+       return 0;
+}
+
+void dm_get(struct mapped_device *md)
+{
+       atomic_inc(&md->holders);
+}
+
+void dm_put(struct mapped_device *md)
+{
+       if (atomic_dec_and_test(&md->holders)) {
+               DMWARN("destroying md");
+               __unbind(md);
+               free_dev(md);
+       }
+}
+
+/*
+ * Requeue the deferred bios by calling generic_make_request.
+ */
+static void flush_deferred_io(struct deferred_io *c)
+{
+       struct deferred_io *n;
+
+       while (c) {
+               n = c->next;
+               generic_make_request(c->bio);
+               free_deferred(c);
+               c = n;
+       }
+}
+
+/*
+ * Swap in a new table (destroying old one).
+ */
+int dm_swap_table(struct mapped_device *md, struct dm_table *table)
+{
+       int r;
+
+       down_write(&md->lock);
+
+       /* device must be suspended */
+       if (!test_bit(DMF_SUSPENDED, &md->flags)) {
+               up_write(&md->lock);
+               return -EPERM;
+       }
+
+       __unbind(md);
+       r = __bind(md, table);
+       if (r)
+               return r;
+
+       up_write(&md->lock);
+       return 0;
+}
+
+/*
+ * We need to be able to change a mapping table under a mounted
+ * filesystem.  For example we might want to move some data in
+ * the background.  Before the table can be swapped with
+ * dm_bind_table, dm_suspend must be called to flush any in
+ * flight bios and ensure that any further io gets deferred.
+ */
+int dm_suspend(struct mapped_device *md)
+{
+       DECLARE_WAITQUEUE(wait, current);
+
+       down_write(&md->lock);
+
+       /*
+        * First we set the BLOCK_IO flag so no more ios will be
+        * mapped.
+        */
+       if (test_bit(DMF_BLOCK_IO, &md->flags)) {
+               up_write(&md->lock);
+               return -EINVAL;
+       }
+
+       set_bit(DMF_BLOCK_IO, &md->flags);
+       up_write(&md->lock);
+
+       /*
+        * Then we wait for the already mapped ios to
+        * complete.
+        */
+       down_read(&md->lock);
+
+       add_wait_queue(&md->wait, &wait);
+       while (1) {
+               set_current_state(TASK_INTERRUPTIBLE);
+
+               if (!atomic_read(&md->pending))
+                       break;
+
+               yield();
+       }
+
+       current->state = TASK_RUNNING;
+       remove_wait_queue(&md->wait, &wait);
+       up_read(&md->lock);
+
+       /* set_bit is atomic */
+       set_bit(DMF_SUSPENDED, &md->flags);
+
+       return 0;
+}
+
+int dm_resume(struct mapped_device *md)
+{
+       struct deferred_io *def;
+
+       down_write(&md->lock);
+       if (!test_bit(DMF_SUSPENDED, &md->flags) ||
+           !dm_table_get_size(md->map)) {
+               up_write(&md->lock);
+               return -EINVAL;
+       }
+
+       clear_bit(DMF_SUSPENDED, &md->flags);
+       clear_bit(DMF_BLOCK_IO, &md->flags);
+       def = md->deferred;
+       md->deferred = NULL;
+       up_write(&md->lock);
+
+       flush_deferred_io(def);
+       blk_run_queues();
+
+       return 0;
+}
+
+kdev_t dm_kdev(struct mapped_device *md)
+{
+       kdev_t dev;
+
+       down_read(&md->lock);
+       dev = md->kdev;
+       up_read(&md->lock);
+
+       return dev;
+}
+
+struct dm_table *dm_get_table(struct mapped_device *md)
+{
+       struct dm_table *t;
+
+       down_read(&md->lock);
+       t = md->map;
+       dm_table_get(t);
+       up_read(&md->lock);
+
+       return t;
+}
+
+int dm_suspended(struct mapped_device *md)
+{
+       return test_bit(DMF_SUSPENDED, &md->flags);
+}
+
+struct block_device_operations dm_blk_dops = {
+       .open = dm_blk_open,
+       .release = dm_blk_close,
+       .owner = THIS_MODULE
+};
+
+/*
+ * module hooks
+ */
+module_init(dm_init);
+module_exit(dm_exit);
+
+MODULE_PARM(major, "i");
+MODULE_PARM_DESC(major, "The major number of the device mapper");
+MODULE_DESCRIPTION(DM_NAME " driver");
+MODULE_AUTHOR("Joe Thornber <thornber@sistina.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
new file mode 100644 (file)
index 0000000..9f6f722
--- /dev/null
@@ -0,0 +1,148 @@
+/*
+ * Internal header file for device mapper
+ *
+ * Copyright (C) 2001, 2002 Sistina Software
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef DM_INTERNAL_H
+#define DM_INTERNAL_H
+
+#include <linux/fs.h>
+#include <linux/device-mapper.h>
+#include <linux/list.h>
+#include <linux/blkdev.h>
+
+#define DM_NAME "device-mapper"
+#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x)
+#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x)
+#define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x)
+
+/*
+ * FIXME: I think this should be with the definition of sector_t
+ * in types.h.
+ */
+#ifdef CONFIG_LBD
+#define SECTOR_FORMAT "%Lu"
+#else
+#define SECTOR_FORMAT "%lu"
+#endif
+
+extern struct block_device_operations dm_blk_dops;
+
+/*
+ * List of devices that a metadevice uses and should open/close.
+ */
+struct dm_dev {
+       struct list_head list;
+
+       atomic_t count;
+       int mode;
+       struct block_device *bdev;
+};
+
+struct dm_table;
+struct mapped_device;
+
+/*-----------------------------------------------------------------
+ * Functions for manipulating a struct mapped_device.
+ * Drop the reference with dm_put when you finish with the object.
+ *---------------------------------------------------------------*/
+int dm_create(int minor, struct dm_table *table, struct mapped_device **md);
+
+/*
+ * Reference counting for md.
+ */
+void dm_get(struct mapped_device *md);
+void dm_put(struct mapped_device *md);
+
+/*
+ * A device can still be used while suspended, but I/O is deferred.
+ */
+int dm_suspend(struct mapped_device *md);
+int dm_resume(struct mapped_device *md);
+
+/*
+ * The device must be suspended before calling this method.
+ */
+int dm_swap_table(struct mapped_device *md, struct dm_table *t);
+
+/*
+ * Drop a reference on the table when you've finished with the
+ * result.
+ */
+struct dm_table *dm_get_table(struct mapped_device *md);
+
+/*
+ * Info functions.
+ */
+kdev_t dm_kdev(struct mapped_device *md);
+int dm_suspended(struct mapped_device *md);
+
+/*-----------------------------------------------------------------
+ * Functions for manipulating a table.  Tables are also reference
+ * counted.
+ *---------------------------------------------------------------*/
+int dm_table_create(struct dm_table **result, int mode);
+
+void dm_table_get(struct dm_table *t);
+void dm_table_put(struct dm_table *t);
+
+int dm_table_add_target(struct dm_table *t, const char *type,
+                       sector_t start, sector_t len, char *params);
+int dm_table_complete(struct dm_table *t);
+void dm_table_event(struct dm_table *t);
+sector_t dm_table_get_size(struct dm_table *t);
+struct dm_target *dm_table_get_target(struct dm_table *t, int index);
+struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
+void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q);
+unsigned int dm_table_get_num_targets(struct dm_table *t);
+struct list_head *dm_table_get_devices(struct dm_table *t);
+int dm_table_get_mode(struct dm_table *t);
+void dm_table_add_wait_queue(struct dm_table *t, wait_queue_t *wq);
+
+/*-----------------------------------------------------------------
+ * A registry of target types.
+ *---------------------------------------------------------------*/
+int dm_target_init(void);
+void dm_target_exit(void);
+struct target_type *dm_get_target_type(const char *name);
+void dm_put_target_type(struct target_type *t);
+
+
+/*-----------------------------------------------------------------
+ * Useful inlines.
+ *---------------------------------------------------------------*/
+static inline int array_too_big(unsigned long fixed, unsigned long obj,
+                               unsigned long num)
+{
+       return (num > (ULONG_MAX - fixed) / obj);
+}
+
+/*
+ * ceiling(n / size) * size
+ */
+static inline unsigned long dm_round_up(unsigned long n, unsigned long size)
+{
+       unsigned long r = n % size;
+       return n + (r ? (size - r) : 0);
+}
+
+/*
+ * The device-mapper can be driven through one of two interfaces;
+ * ioctl or filesystem, depending which patch you have applied.
+ */
+int dm_interface_init(void);
+void dm_interface_exit(void);
+
+/*
+ * Targets for linear and striped mappings
+ */
+int dm_linear_init(void);
+void dm_linear_exit(void);
+
+int dm_stripe_init(void);
+void dm_stripe_exit(void);
+
+#endif
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
new file mode 100644 (file)
index 0000000..bf9b15f
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef _LINUX_DEVICE_MAPPER_H
+#define _LINUX_DEVICE_MAPPER_H
+
+#define DM_DIR "mapper"        /* Slashes not supported */
+#define DM_MAX_TYPE_NAME 16
+#define DM_NAME_LEN 128
+#define DM_UUID_LEN 129
+
+#ifdef __KERNEL__
+
+struct dm_target;
+struct dm_table;
+struct dm_dev;
+
+typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
+
+/*
+ * In the constructor the target parameter will already have the
+ * table, type, begin and len fields filled in.
+ */
+typedef int (*dm_ctr_fn) (struct dm_target *target, int argc, char **argv);
+
+/*
+ * The destructor doesn't need to free the dm_target, just
+ * anything hidden ti->private.
+ */
+typedef void (*dm_dtr_fn) (struct dm_target *ti);
+
+/*
+ * The map function must return:
+ * < 0: error
+ * = 0: The target will handle the io by resubmitting it later
+ * > 0: simple remap complete
+ */
+typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio);
+typedef int (*dm_status_fn) (struct dm_target *ti, status_type_t status_type,
+                            char *result, int maxlen);
+
+void dm_error(const char *message);
+
+/*
+ * Constructors should call these functions to ensure destination devices
+ * are opened/closed correctly.
+ * FIXME: too many arguments.
+ */
+int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
+                 sector_t len, int mode, struct dm_dev **result);
+void dm_put_device(struct dm_target *ti, struct dm_dev *d);
+
+/*
+ * Information about a target type
+ */
+struct target_type {
+       const char *name;
+       struct module *module;
+       dm_ctr_fn ctr;
+       dm_dtr_fn dtr;
+       dm_map_fn map;
+       dm_status_fn status;
+};
+
+struct io_restrictions {
+       unsigned short          max_sectors;
+       unsigned short          max_phys_segments;
+       unsigned short          max_hw_segments;
+       unsigned short          hardsect_size;
+       unsigned int            max_segment_size;
+       unsigned long           seg_boundary_mask;
+};
+
+struct dm_target {
+       struct dm_table *table;
+       struct target_type *type;
+
+       /* target limits */
+       sector_t begin;
+       sector_t len;
+
+       /* FIXME: turn this into a mask, and merge with io_restrictions */
+       sector_t split_io;
+
+       /*
+        * These are automaticall filled in by
+        * dm_table_get_device.
+        */
+       struct io_restrictions limits;
+
+       /* target specific data */
+       void *private;
+
+       /* Used to provide an error string from the ctr */
+       char *error;
+};
+
+int dm_register_target(struct target_type *t);
+int dm_unregister_target(struct target_type *t);
+
+#endif                         /* __KERNEL__ */
+
+#endif                         /* _LINUX_DEVICE_MAPPER_H */
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
new file mode 100644 (file)
index 0000000..c5ae8cd
--- /dev/null
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef _LINUX_DM_IOCTL_H
+#define _LINUX_DM_IOCTL_H
+
+#include <linux/device-mapper.h>
+#include <linux/types.h>
+
+/*
+ * Implements a traditional ioctl interface to the device mapper.
+ */
+
+/*
+ * All ioctl arguments consist of a single chunk of memory, with
+ * this structure at the start.  If a uuid is specified any
+ * lookup (eg. for a DM_INFO) will be done on that, *not* the
+ * name.
+ */
+struct dm_ioctl {
+       /*
+        * The version number is made up of three parts:
+        * major - no backward or forward compatibility,
+        * minor - only backwards compatible,
+        * patch - both backwards and forwards compatible.
+        *
+        * All clients of the ioctl interface should fill in the
+        * version number of the interface that they were
+        * compiled with.
+        *
+        * All recognised ioctl commands (ie. those that don't
+        * return -ENOTTY) fill out this field, even if the
+        * command failed.
+        */
+       uint32_t version[3];    /* in/out */
+       uint32_t data_size;     /* total size of data passed in
+                                * including this struct */
+
+       uint32_t data_start;    /* offset to start of data
+                                * relative to start of this struct */
+
+       uint32_t target_count;  /* in/out */
+       uint32_t open_count;    /* out */
+       uint32_t flags;         /* in/out */
+
+       __kernel_dev_t dev;     /* in/out */
+
+       char name[DM_NAME_LEN]; /* device name */
+       char uuid[DM_UUID_LEN]; /* unique identifier for
+                                * the block device */
+};
+
+/*
+ * Used to specify tables.  These structures appear after the
+ * dm_ioctl.
+ */
+struct dm_target_spec {
+       int32_t status;         /* used when reading from kernel only */
+       uint64_t sector_start;
+       uint32_t length;
+
+       /*
+        * Offset in bytes (from the start of this struct) to
+        * next target_spec.
+        */
+       uint32_t next;
+
+       char target_type[DM_MAX_TYPE_NAME];
+
+       /*
+        * Parameter string starts immediately after this object.
+        * Be careful to add padding after string to ensure correct
+        * alignment of subsequent dm_target_spec.
+        */
+};
+
+/*
+ * Used to retrieve the target dependencies.
+ */
+struct dm_target_deps {
+       uint32_t count;
+
+       __kernel_dev_t dev[0];  /* out */
+};
+
+/*
+ * If you change this make sure you make the corresponding change
+ * to dm-ioctl.c:lookup_ioctl()
+ */
+enum {
+       /* Top level cmds */
+       DM_VERSION_CMD = 0,
+       DM_REMOVE_ALL_CMD,
+
+       /* device level cmds */
+       DM_DEV_CREATE_CMD,
+       DM_DEV_REMOVE_CMD,
+       DM_DEV_RELOAD_CMD,
+       DM_DEV_RENAME_CMD,
+       DM_DEV_SUSPEND_CMD,
+       DM_DEV_DEPS_CMD,
+       DM_DEV_STATUS_CMD,
+
+       /* target level cmds */
+       DM_TARGET_STATUS_CMD,
+       DM_TARGET_WAIT_CMD
+};
+
+#define DM_IOCTL 0xfd
+
+#define DM_VERSION       _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
+#define DM_REMOVE_ALL    _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
+
+#define DM_DEV_CREATE    _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
+#define DM_DEV_REMOVE    _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl)
+#define DM_DEV_RELOAD    _IOWR(DM_IOCTL, DM_DEV_RELOAD_CMD, struct dm_ioctl)
+#define DM_DEV_SUSPEND   _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
+#define DM_DEV_RENAME    _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl)
+#define DM_DEV_DEPS      _IOWR(DM_IOCTL, DM_DEV_DEPS_CMD, struct dm_ioctl)
+#define DM_DEV_STATUS    _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
+
+#define DM_TARGET_STATUS _IOWR(DM_IOCTL, DM_TARGET_STATUS_CMD, struct dm_ioctl)
+#define DM_TARGET_WAIT   _IOWR(DM_IOCTL, DM_TARGET_WAIT_CMD, struct dm_ioctl)
+
+#define DM_VERSION_MAJOR       1
+#define DM_VERSION_MINOR       0
+#define DM_VERSION_PATCHLEVEL  6
+#define DM_VERSION_EXTRA       "-ioctl (2002-10-15)"
+
+/* Status bits */
+#define DM_READONLY_FLAG       0x00000001
+#define DM_SUSPEND_FLAG                0x00000002
+#define DM_EXISTS_FLAG         0x00000004
+#define DM_PERSISTENT_DEV_FLAG 0x00000008
+
+/*
+ * Flag passed into ioctl STATUS command to get table information
+ * rather than current status.
+ */
+#define DM_STATUS_TABLE_FLAG   0x00000010
+
+#endif                         /* _LINUX_DM_IOCTL_H */