add lost files by original gitignore
authorGui Chen <gui.chen@intel.com>
Wed, 22 Aug 2012 13:33:46 +0000 (21:33 +0800)
committerGui Chen <gui.chen@intel.com>
Wed, 22 Aug 2012 13:33:46 +0000 (21:33 +0800)
Signed-off-by: Gui Chen <gui.chen@intel.com>
57 files changed:
.gitignore
kpartx/ChangeLog [new file with mode: 0644]
kpartx/Makefile [new file with mode: 0644]
kpartx/README [new file with mode: 0644]
kpartx/bsd.c [new file with mode: 0644]
kpartx/byteorder.h [new file with mode: 0644]
kpartx/crc32.c [new file with mode: 0644]
kpartx/crc32.h [new file with mode: 0644]
kpartx/dasd.c [new file with mode: 0644]
kpartx/dasd.h [new file with mode: 0644]
kpartx/devmapper.c [new file with mode: 0644]
kpartx/devmapper.h [new file with mode: 0644]
kpartx/dos.c [new file with mode: 0644]
kpartx/dos.h [new file with mode: 0644]
kpartx/efi.h [new file with mode: 0644]
kpartx/gpt.c [new file with mode: 0644]
kpartx/gpt.h [new file with mode: 0644]
kpartx/kpartx.8 [new file with mode: 0644]
kpartx/kpartx.c [new file with mode: 0644]
kpartx/kpartx.h [new file with mode: 0644]
kpartx/kpartx.rules [new file with mode: 0644]
kpartx/kpartx_id [new file with mode: 0644]
kpartx/lopart.c [new file with mode: 0644]
kpartx/lopart.h [new file with mode: 0644]
kpartx/mac.c [new file with mode: 0644]
kpartx/mac.h [new file with mode: 0644]
kpartx/solaris.c [new file with mode: 0644]
kpartx/sun.c [new file with mode: 0644]
kpartx/sysmacros.h [new file with mode: 0644]
kpartx/unixware.c [new file with mode: 0644]
kpartx/xstrncpy.c [new file with mode: 0644]
kpartx/xstrncpy.h [new file with mode: 0644]
multipath/01_udev [new file with mode: 0755]
multipath/02_multipath [new file with mode: 0755]
multipath/Makefile [new file with mode: 0644]
multipath/dev_t.h [new file with mode: 0644]
multipath/main.c [new file with mode: 0644]
multipath/multipath.8 [new file with mode: 0644]
multipath/multipath.conf.5 [new file with mode: 0644]
multipath/multipath.rules [new file with mode: 0644]
multipathd/Makefile [new file with mode: 0644]
multipathd/cli.c [new file with mode: 0644]
multipathd/cli.h [new file with mode: 0644]
multipathd/cli_handlers.c [new file with mode: 0644]
multipathd/cli_handlers.h [new file with mode: 0644]
multipathd/main.c [new file with mode: 0644]
multipathd/main.h [new file with mode: 0644]
multipathd/multipathd.8 [new file with mode: 0644]
multipathd/multipathd.init.debian [new file with mode: 0644]
multipathd/multipathd.init.redhat [new file with mode: 0644]
multipathd/multipathd.init.suse [new file with mode: 0644]
multipathd/pidfile.c [new file with mode: 0644]
multipathd/pidfile.h [new file with mode: 0644]
multipathd/uxclnt.c [new file with mode: 0644]
multipathd/uxclnt.h [new file with mode: 0644]
multipathd/uxlsnr.c [new file with mode: 0644]
multipathd/uxlsnr.h [new file with mode: 0644]

index 9b3f663..8f9ee98 100644 (file)
@@ -5,6 +5,3 @@
 *.so.0
 *.a
 *.gz
-kpartx
-multipath
-multipathd
diff --git a/kpartx/ChangeLog b/kpartx/ChangeLog
new file mode 100644 (file)
index 0000000..cd0c6c1
--- /dev/null
@@ -0,0 +1,9 @@
+002:
+* convert to kpartx name everywhere
+* remove all HDGEO ioctl code
+* now work with files by mapping loops on the fly
+* merged and massage lopart.[ch] from lomount.[ch]
+  (due credit to original author here : hpa ?)
+* added a fn find_loop_by_file in lopart.[ch]
+001:
+* Initial release
diff --git a/kpartx/Makefile b/kpartx/Makefile
new file mode 100644 (file)
index 0000000..e1e9651
--- /dev/null
@@ -0,0 +1,35 @@
+# Makefile
+#
+# Copyright (C) 2003 Christophe Varoqui, <christophe.varoqui@opensvc.com>
+#
+include ../Makefile.inc
+
+CFLAGS += -I. -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+
+LDFLAGS = -ldevmapper
+OBJS = bsd.o dos.o kpartx.o solaris.o unixware.o dasd.o sun.o \
+       gpt.o mac.o crc32.o lopart.o xstrncpy.o devmapper.o
+EXEC = kpartx
+
+all: $(EXEC)
+
+$(EXEC): $(OBJS)
+       $(CC) $(OBJS) -o $(EXEC) $(LDFLAGS)
+       $(GZIP) $(EXEC).8 > $(EXEC).8.gz
+       
+install: $(EXEC) $(EXEC).8
+       $(INSTALL_PROGRAM) -d $(DESTDIR)$(bindir)
+       $(INSTALL_PROGRAM) -m 755 $(EXEC) $(DESTDIR)$(bindir)
+       $(INSTALL_PROGRAM) -d $(DESTDIR)$(libudevdir)
+       $(INSTALL_PROGRAM) -m 755 kpartx_id $(DESTDIR)$(libudevdir)
+       $(INSTALL_PROGRAM) -d $(DESTDIR)/etc/udev/rules.d
+       $(INSTALL_PROGRAM) -m 644 kpartx.rules $(DESTDIR)/etc/udev/rules.d/
+       $(INSTALL_PROGRAM) -d $(DESTDIR)$(mandir)
+       $(INSTALL_PROGRAM) -m 644 $(EXEC).8.gz $(DESTDIR)$(mandir)
+
+uninstall:
+       rm -f $(DESTDIR)$(bindir)/$(EXEC)
+       rm -f $(DESTDIR)$(mandir)/$(EXEC).8.gz
+
+clean:
+       rm -f core *.o $(EXEC) *.gz
diff --git a/kpartx/README b/kpartx/README
new file mode 100644 (file)
index 0000000..e0680b1
--- /dev/null
@@ -0,0 +1,9 @@
+This version of partx is intented to be build 
+static against klibc.
+
+It creates partitions as device maps.
+
+With due respect to the original authors,
+
+have fun,
+cvaroqui
diff --git a/kpartx/bsd.c b/kpartx/bsd.c
new file mode 100644 (file)
index 0000000..f87175e
--- /dev/null
@@ -0,0 +1,114 @@
+#include "kpartx.h"
+#include <stdio.h>
+
+#define BSD_DISKMAGIC  (0x82564557UL)  /* The disk magic number */
+#define XBSD_MAXPARTITIONS     16
+#define BSD_FS_UNUSED          0
+
+struct bsd_disklabel {
+       unsigned int    d_magic;        /* the magic number */
+       short int       d_type;         /* drive type */
+       short int       d_subtype;      /* controller/d_type specific */
+       char    d_typename[16];         /* type name, e.g. "eagle" */
+       char    d_packname[16];         /* pack identifier */
+       unsigned int    d_secsize;      /* # of bytes per sector */
+       unsigned int    d_nsectors;     /* # of data sectors per track */
+       unsigned int    d_ntracks;      /* # of tracks per cylinder */
+       unsigned int    d_ncylinders;   /* # of data cylinders per unit */
+       unsigned int    d_secpercyl;    /* # of data sectors per cylinder */
+       unsigned int    d_secperunit;   /* # of data sectors per unit */
+       unsigned short  d_sparespertrack;/* # of spare sectors per track */
+       unsigned short  d_sparespercyl; /* # of spare sectors per cylinder */
+       unsigned int    d_acylinders;   /* # of alt. cylinders per unit */
+       unsigned short  d_rpm;          /* rotational speed */
+       unsigned short  d_interleave;   /* hardware sector interleave */
+       unsigned short  d_trackskew;    /* sector 0 skew, per track */
+       unsigned short  d_cylskew;      /* sector 0 skew, per cylinder */
+       unsigned int    d_headswitch;   /* head switch time, usec */
+       unsigned int    d_trkseek;      /* track-to-track seek, usec */
+       unsigned int    d_flags;        /* generic flags */
+       unsigned int    d_drivedata[5]; /* drive-type specific information */
+       unsigned int    d_spare[5];     /* reserved for future use */
+       unsigned int    d_magic2;       /* the magic number (again) */
+       unsigned short  d_checksum;     /* xor of data incl. partitions */
+
+                       /* filesystem and partition information: */
+       unsigned short  d_npartitions;  /* number of partitions in following */
+       unsigned int    d_bbsize;       /* size of boot area at sn0, bytes */
+       unsigned int    d_sbsize;       /* max size of fs superblock, bytes */
+       struct  bsd_partition {         /* the partition table */
+               unsigned int    p_size;   /* number of sectors in partition */
+               unsigned int    p_offset; /* starting sector */
+               unsigned int    p_fsize;  /* filesystem basic fragment size */
+               unsigned char   p_fstype; /* filesystem type, see below */
+               unsigned char   p_frag;   /* filesystem fragments per block */
+               unsigned short  p_cpg;    /* filesystem cylinders per group */
+       } d_partitions[XBSD_MAXPARTITIONS];/* actually may be more */
+};
+
+int
+read_bsd_pt(int fd, struct slice all, struct slice *sp, int ns) {
+       struct bsd_disklabel *l;
+       struct bsd_partition *p;
+       unsigned int offset = all.start, end;
+       int max_partitions;
+       char *bp;
+       int n = 0, i, j;
+
+       bp = getblock(fd, offset+1);    /* 1 sector suffices */
+       if (bp == NULL)
+               return -1;
+
+       l = (struct bsd_disklabel *) bp;
+       if (l->d_magic != BSD_DISKMAGIC)
+               return -1;
+
+       max_partitions = 16;
+       if (l->d_npartitions < max_partitions)
+               max_partitions = l->d_npartitions;
+       for (p = l->d_partitions; p - l->d_partitions <  max_partitions; p++) {
+               if (p->p_fstype == BSD_FS_UNUSED)
+                       /* nothing */;
+               else if (n < ns) {
+                       sp[n].start = p->p_offset;
+                       sp[n].size = p->p_size;
+                       n++;
+               } else {
+                       fprintf(stderr,
+                               "bsd_partition: too many slices\n");
+                       break;
+               }
+       }
+       /*
+        * Convention has it that the bsd disklabel will always have
+        * the 'c' partition spanning the entire disk.
+        * So we have to check for contained slices.
+        */
+       for(i = 0; i < n; i++) {
+               if (sp[i].size == 0)
+                       continue;
+
+               end = sp[i].start + sp[i].size;
+               for(j = 0; j < n; j ++) {
+                       if ( i == j )
+                               continue;
+                       if (sp[j].size == 0)
+                               continue;
+
+                       if (sp[i].start < sp[j].start) {
+                               if (end > sp[j].start &&
+                                   end < sp[j].start + sp[j].size) {
+                                       /* Invalid slice */
+                                       fprintf(stderr,
+                                               "bsd_disklabel: slice %d overlaps with %d\n", i , j);
+                                       sp[i].size = 0;
+                               }
+                       } else {
+                               if (end <= sp[j].start + sp[j].size) {
+                                       sp[i].container = j + 1;
+                               }
+                       }
+               }
+       }
+       return n;
+}
diff --git a/kpartx/byteorder.h b/kpartx/byteorder.h
new file mode 100644 (file)
index 0000000..21962d6
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef BYTEORDER_H_INCLUDED
+#define BYTEORDER_H_INCLUDED
+
+#ifdef __linux__
+#  include <endian.h>
+#  include <byteswap.h>
+#else
+#  error unsupported
+#endif
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#  define le16_to_cpu(x) (x)
+#  define be16_to_cpu(x) bswap_16(x)
+#  define le32_to_cpu(x) (x)
+#  define be32_to_cpu(x) bswap_32(x)
+#elif BYTE_ORDER == BIG_ENDIAN
+#  define le16_to_cpu(x) bswap_16(x)
+#  define be16_to_cpu(x) (x)
+#  define le32_to_cpu(x) bswap_32(x)
+#  define be32_to_cpu(x) (x)
+#else
+#  error unsupported
+#endif
+
+#endif                         /* BYTEORDER_H_INCLUDED */
diff --git a/kpartx/crc32.c b/kpartx/crc32.c
new file mode 100644 (file)
index 0000000..42d803d
--- /dev/null
@@ -0,0 +1,393 @@
+/* 
+ * crc32.c
+ * This code is in the public domain; copyright abandoned.
+ * Liability for non-performance of this code is limited to the amount
+ * you paid for it.  Since it is distributed for free, your refund will
+ * be very very small.  If it breaks, you get to keep both pieces.
+ */
+
+#include "crc32.h"
+
+#if __GNUC__ >= 3      /* 2.x has "attribute", but only 3.0 has "pure */
+#define attribute(x) __attribute__(x)
+#else
+#define attribute(x)
+#endif
+
+/*
+ * There are multiple 16-bit CRC polynomials in common use, but this is
+ * *the* standard CRC-32 polynomial, first popularized by Ethernet.
+ * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0
+ */
+#define CRCPOLY_LE 0xedb88320
+#define CRCPOLY_BE 0x04c11db7
+
+/* How many bits at a time to use.  Requires a table of 4<<CRC_xx_BITS bytes. */
+/* For less performance-sensitive, use 4 */
+#define CRC_LE_BITS 8
+#define CRC_BE_BITS 8
+
+/*
+ * Little-endian CRC computation.  Used with serial bit streams sent
+ * lsbit-first.  Be sure to use cpu_to_le32() to append the computed CRC.
+ */
+#if CRC_LE_BITS > 8 || CRC_LE_BITS < 1 || CRC_LE_BITS & CRC_LE_BITS-1
+# error CRC_LE_BITS must be a power of 2 between 1 and 8
+#endif
+
+#if CRC_LE_BITS == 1
+/*
+ * In fact, the table-based code will work in this case, but it can be
+ * simplified by inlining the table in ?: form.
+ */
+#define crc32init_le()
+#define crc32cleanup_le()
+/**
+ * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32
+ * @crc - seed value for computation.  ~0 for Ethernet, sometimes 0 for
+ *        other uses, or the previous crc32 value if computing incrementally.
+ * @p   - pointer to buffer over which CRC is run
+ * @len - length of buffer @p
+ * 
+ */
+uint32_t attribute((pure)) crc32_le(uint32_t crc, unsigned char const *p, size_t len)
+{
+       int i;
+       while (len--) {
+               crc ^= *p++;
+               for (i = 0; i < 8; i++)
+                       crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
+       }
+       return crc;
+}
+#else                          /* Table-based approach */
+
+static uint32_t *crc32table_le;
+/**
+ * crc32init_le() - allocate and initialize LE table data
+ *
+ * crc is the crc of the byte i; other entries are filled in based on the
+ * fact that crctable[i^j] = crctable[i] ^ crctable[j].
+ *
+ */
+static int
+crc32init_le(void)
+{
+       unsigned i, j;
+       uint32_t crc = 1;
+
+       crc32table_le =
+               malloc((1 << CRC_LE_BITS) * sizeof(uint32_t));
+       if (!crc32table_le)
+               return 1;
+       crc32table_le[0] = 0;
+
+       for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) {
+               crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
+               for (j = 0; j < 1 << CRC_LE_BITS; j += 2 * i)
+                       crc32table_le[i + j] = crc ^ crc32table_le[j];
+       }
+       return 0;
+}
+
+/**
+ * crc32cleanup_le(): free LE table data
+ */
+static void
+crc32cleanup_le(void)
+{
+       if (crc32table_le) free(crc32table_le);
+       crc32table_le = NULL;
+}
+
+/**
+ * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32
+ * @crc - seed value for computation.  ~0 for Ethernet, sometimes 0 for
+ *        other uses, or the previous crc32 value if computing incrementally.
+ * @p   - pointer to buffer over which CRC is run
+ * @len - length of buffer @p
+ * 
+ */
+uint32_t attribute((pure)) crc32_le(uint32_t crc, unsigned char const *p, size_t len)
+{
+       while (len--) {
+# if CRC_LE_BITS == 8
+               crc = (crc >> 8) ^ crc32table_le[(crc ^ *p++) & 255];
+# elif CRC_LE_BITS == 4
+               crc ^= *p++;
+               crc = (crc >> 4) ^ crc32table_le[crc & 15];
+               crc = (crc >> 4) ^ crc32table_le[crc & 15];
+# elif CRC_LE_BITS == 2
+               crc ^= *p++;
+               crc = (crc >> 2) ^ crc32table_le[crc & 3];
+               crc = (crc >> 2) ^ crc32table_le[crc & 3];
+               crc = (crc >> 2) ^ crc32table_le[crc & 3];
+               crc = (crc >> 2) ^ crc32table_le[crc & 3];
+# endif
+       }
+       return crc;
+}
+#endif
+
+/*
+ * Big-endian CRC computation.  Used with serial bit streams sent
+ * msbit-first.  Be sure to use cpu_to_be32() to append the computed CRC.
+ */
+#if CRC_BE_BITS > 8 || CRC_BE_BITS < 1 || CRC_BE_BITS & CRC_BE_BITS-1
+# error CRC_BE_BITS must be a power of 2 between 1 and 8
+#endif
+
+#if CRC_BE_BITS == 1
+/*
+ * In fact, the table-based code will work in this case, but it can be
+ * simplified by inlining the table in ?: form.
+ */
+#define crc32init_be()
+#define crc32cleanup_be()
+
+/**
+ * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
+ * @crc - seed value for computation.  ~0 for Ethernet, sometimes 0 for
+ *        other uses, or the previous crc32 value if computing incrementally.
+ * @p   - pointer to buffer over which CRC is run
+ * @len - length of buffer @p
+ * 
+ */
+uint32_t attribute((pure)) crc32_be(uint32_t crc, unsigned char const *p, size_t len)
+{
+       int i;
+       while (len--) {
+               crc ^= *p++ << 24;
+               for (i = 0; i < 8; i++)
+                       crc =
+                           (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE :
+                                         0);
+       }
+       return crc;
+}
+
+#else                          /* Table-based approach */
+static uint32_t *crc32table_be;
+
+/**
+ * crc32init_be() - allocate and initialize BE table data
+ */
+static int
+crc32init_be(void)
+{
+       unsigned i, j;
+       uint32_t crc = 0x80000000;
+
+       crc32table_be =
+               malloc((1 << CRC_BE_BITS) * sizeof(uint32_t));
+       if (!crc32table_be)
+               return 1;
+       crc32table_be[0] = 0;
+
+       for (i = 1; i < 1 << CRC_BE_BITS; i <<= 1) {
+               crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0);
+               for (j = 0; j < i; j++)
+                       crc32table_be[i + j] = crc ^ crc32table_be[j];
+       }
+       return 0;
+}
+
+/**
+ * crc32cleanup_be(): free BE table data
+ */
+static void
+crc32cleanup_be(void)
+{
+       if (crc32table_be) free(crc32table_be);
+       crc32table_be = NULL;
+}
+
+
+/**
+ * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
+ * @crc - seed value for computation.  ~0 for Ethernet, sometimes 0 for
+ *        other uses, or the previous crc32 value if computing incrementally.
+ * @p   - pointer to buffer over which CRC is run
+ * @len - length of buffer @p
+ * 
+ */
+uint32_t attribute((pure)) crc32_be(uint32_t crc, unsigned char const *p, size_t len)
+{
+       while (len--) {
+# if CRC_BE_BITS == 8
+               crc = (crc << 8) ^ crc32table_be[(crc >> 24) ^ *p++];
+# elif CRC_BE_BITS == 4
+               crc ^= *p++ << 24;
+               crc = (crc << 4) ^ crc32table_be[crc >> 28];
+               crc = (crc << 4) ^ crc32table_be[crc >> 28];
+# elif CRC_BE_BITS == 2
+               crc ^= *p++ << 24;
+               crc = (crc << 2) ^ crc32table_be[crc >> 30];
+               crc = (crc << 2) ^ crc32table_be[crc >> 30];
+               crc = (crc << 2) ^ crc32table_be[crc >> 30];
+               crc = (crc << 2) ^ crc32table_be[crc >> 30];
+# endif
+       }
+       return crc;
+}
+#endif
+
+/*
+ * A brief CRC tutorial.
+ *
+ * A CRC is a long-division remainder.  You add the CRC to the message,
+ * and the whole thing (message+CRC) is a multiple of the given
+ * CRC polynomial.  To check the CRC, you can either check that the
+ * CRC matches the recomputed value, *or* you can check that the
+ * remainder computed on the message+CRC is 0.  This latter approach
+ * is used by a lot of hardware implementations, and is why so many
+ * protocols put the end-of-frame flag after the CRC.
+ *
+ * It's actually the same long division you learned in school, except that
+ * - We're working in binary, so the digits are only 0 and 1, and
+ * - When dividing polynomials, there are no carries.  Rather than add and
+ *   subtract, we just xor.  Thus, we tend to get a bit sloppy about
+ *   the difference between adding and subtracting.
+ *
+ * A 32-bit CRC polynomial is actually 33 bits long.  But since it's
+ * 33 bits long, bit 32 is always going to be set, so usually the CRC
+ * is written in hex with the most significant bit omitted.  (If you're
+ * familiar with the IEEE 754 floating-point format, it's the same idea.)
+ *
+ * Note that a CRC is computed over a string of *bits*, so you have
+ * to decide on the endianness of the bits within each byte.  To get
+ * the best error-detecting properties, this should correspond to the
+ * order they're actually sent.  For example, standard RS-232 serial is
+ * little-endian; the most significant bit (sometimes used for parity)
+ * is sent last.  And when appending a CRC word to a message, you should
+ * do it in the right order, matching the endianness.
+ *
+ * Just like with ordinary division, the remainder is always smaller than
+ * the divisor (the CRC polynomial) you're dividing by.  Each step of the
+ * division, you take one more digit (bit) of the dividend and append it
+ * to the current remainder.  Then you figure out the appropriate multiple
+ * of the divisor to subtract to being the remainder back into range.
+ * In binary, it's easy - it has to be either 0 or 1, and to make the
+ * XOR cancel, it's just a copy of bit 32 of the remainder.
+ *
+ * When computing a CRC, we don't care about the quotient, so we can
+ * throw the quotient bit away, but subtract the appropriate multiple of
+ * the polynomial from the remainder and we're back to where we started,
+ * ready to process the next bit.
+ *
+ * A big-endian CRC written this way would be coded like:
+ * for (i = 0; i < input_bits; i++) {
+ *     multiple = remainder & 0x80000000 ? CRCPOLY : 0;
+ *     remainder = (remainder << 1 | next_input_bit()) ^ multiple;
+ * }
+ * Notice how, to get at bit 32 of the shifted remainder, we look
+ * at bit 31 of the remainder *before* shifting it.
+ *
+ * But also notice how the next_input_bit() bits we're shifting into
+ * the remainder don't actually affect any decision-making until
+ * 32 bits later.  Thus, the first 32 cycles of this are pretty boring.
+ * Also, to add the CRC to a message, we need a 32-bit-long hole for it at
+ * the end, so we have to add 32 extra cycles shifting in zeros at the
+ * end of every message,
+ *
+ * So the standard trick is to rearrage merging in the next_input_bit()
+ * until the moment it's needed.  Then the first 32 cycles can be precomputed,
+ * and merging in the final 32 zero bits to make room for the CRC can be
+ * skipped entirely.
+ * This changes the code to:
+ * for (i = 0; i < input_bits; i++) {
+ *      remainder ^= next_input_bit() << 31;
+ *     multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
+ *     remainder = (remainder << 1) ^ multiple;
+ * }
+ * With this optimization, the little-endian code is simpler:
+ * for (i = 0; i < input_bits; i++) {
+ *      remainder ^= next_input_bit();
+ *     multiple = (remainder & 1) ? CRCPOLY : 0;
+ *     remainder = (remainder >> 1) ^ multiple;
+ * }
+ *
+ * Note that the other details of endianness have been hidden in CRCPOLY
+ * (which must be bit-reversed) and next_input_bit().
+ *
+ * However, as long as next_input_bit is returning the bits in a sensible
+ * order, we can actually do the merging 8 or more bits at a time rather
+ * than one bit at a time:
+ * for (i = 0; i < input_bytes; i++) {
+ *     remainder ^= next_input_byte() << 24;
+ *     for (j = 0; j < 8; j++) {
+ *             multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
+ *             remainder = (remainder << 1) ^ multiple;
+ *     }
+ * }
+ * Or in little-endian:
+ * for (i = 0; i < input_bytes; i++) {
+ *     remainder ^= next_input_byte();
+ *     for (j = 0; j < 8; j++) {
+ *             multiple = (remainder & 1) ? CRCPOLY : 0;
+ *             remainder = (remainder << 1) ^ multiple;
+ *     }
+ * }
+ * If the input is a multiple of 32 bits, you can even XOR in a 32-bit
+ * word at a time and increase the inner loop count to 32.
+ *
+ * You can also mix and match the two loop styles, for example doing the
+ * bulk of a message byte-at-a-time and adding bit-at-a-time processing
+ * for any fractional bytes at the end.
+ *
+ * The only remaining optimization is to the byte-at-a-time table method.
+ * Here, rather than just shifting one bit of the remainder to decide
+ * in the correct multiple to subtract, we can shift a byte at a time.
+ * This produces a 40-bit (rather than a 33-bit) intermediate remainder,
+ * but again the multiple of the polynomial to subtract depends only on
+ * the high bits, the high 8 bits in this case.  
+ *
+ * The multile we need in that case is the low 32 bits of a 40-bit
+ * value whose high 8 bits are given, and which is a multiple of the
+ * generator polynomial.  This is simply the CRC-32 of the given
+ * one-byte message.
+ *
+ * Two more details: normally, appending zero bits to a message which
+ * is already a multiple of a polynomial produces a larger multiple of that
+ * polynomial.  To enable a CRC to detect this condition, it's common to
+ * invert the CRC before appending it.  This makes the remainder of the
+ * message+crc come out not as zero, but some fixed non-zero value.
+ *
+ * The same problem applies to zero bits prepended to the message, and
+ * a similar solution is used.  Instead of starting with a remainder of
+ * 0, an initial remainder of all ones is used.  As long as you start
+ * the same way on decoding, it doesn't make a difference.
+ */
+
+
+/**
+ * init_crc32(): generates CRC32 tables
+ * 
+ * On successful initialization, use count is increased.
+ * This guarantees that the library functions will stay resident
+ * in memory, and prevents someone from 'rmmod crc32' while
+ * a driver that needs it is still loaded.
+ * This also greatly simplifies drivers, as there's no need
+ * to call an initialization/cleanup function from each driver.
+ * Since crc32.o is a library module, there's no requirement
+ * that the user can unload it.
+ */
+int
+init_crc32(void)
+{
+       int rc1, rc2, rc;
+       rc1 = crc32init_le();
+       rc2 = crc32init_be();
+       rc = rc1 || rc2;
+       return rc;
+}
+
+/**
+ * cleanup_crc32(): frees crc32 data when no longer needed
+ */
+void
+cleanup_crc32(void)
+{
+       crc32cleanup_le();
+       crc32cleanup_be();
+}
diff --git a/kpartx/crc32.h b/kpartx/crc32.h
new file mode 100644 (file)
index 0000000..a4505b8
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * crc32.h
+ */
+#ifndef _CRC32_H
+#define _CRC32_H
+
+#include <inttypes.h>
+#include <stdlib.h>
+
+extern int init_crc32(void);
+extern void cleanup_crc32(void);
+extern uint32_t  crc32_le(uint32_t crc, unsigned char const *p, size_t len);
+extern uint32_t  crc32_be(uint32_t crc, unsigned char const *p, size_t len);
+
+#define crc32(seed, data, length)  crc32_le(seed, (unsigned char const *)data, length)
+#define ether_crc_le(length, data) crc32_le(~0, data, length)
+#define ether_crc(length, data)    crc32_be(~0, data, length)
+
+#endif /* _CRC32_H */
diff --git a/kpartx/dasd.c b/kpartx/dasd.c
new file mode 100644 (file)
index 0000000..dcdf678
--- /dev/null
@@ -0,0 +1,262 @@
+/*
+ * dasd.c
+ *
+ * IBM DASD partition table handling.
+ *
+ * Mostly taken from drivers/s390/block/dasd.c
+ *
+ * Copyright (c) 2005, Hannes Reinecke, SUSE Linux Products GmbH
+ * Copyright IBM Corporation, 2009
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ * USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <linux/hdreg.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <libdevmapper.h>
+#include "devmapper.h"
+#include "kpartx.h"
+#include "byteorder.h"
+#include "dasd.h"
+
+unsigned long long sectors512(unsigned long long sectors, int blocksize)
+{
+       return sectors * (blocksize >> 9);
+}
+
+/*
+ */
+int 
+read_dasd_pt(int fd, struct slice all, struct slice *sp, int ns)
+{
+       int retval = -1;
+       int blocksize;
+       uint64_t disksize;
+       uint64_t offset, size, fmt_size;
+       dasd_information_t info;
+       struct hd_geometry geo;
+       char type[5] = {0,};
+       volume_label_t vlabel;
+       unsigned char *data = NULL;
+       uint64_t blk;
+       int fd_dasd = -1;
+       struct stat sbuf;
+       dev_t dev;
+       char *devname;
+       char pathname[256];
+
+       if (fd < 0) {
+               return -1;
+       }
+
+       if (fstat(fd, &sbuf) == -1) {
+               return -1;
+       }
+
+       devname = dm_mapname(major(sbuf.st_rdev), minor(sbuf.st_rdev));
+
+       if (devname != NULL) {
+               /* We were passed a handle to a dm device.
+                * Get the first target and operate on that instead.
+                */
+               if (!(dev = dm_get_first_dep(devname))) {
+                       free(devname);
+                       return -1;
+               }
+               free(devname);
+
+               if ((unsigned int)major(dev) != 94) {
+                       /* Not a DASD */
+                       return -1;
+               }
+
+               /*
+                * Hard to believe, but there's no simple way to translate
+                * major/minor into an openable device file, so we have
+                * to create one for ourselves.
+                */
+               
+               sprintf(pathname, "/dev/.kpartx-node-%u-%u",
+                       (unsigned int)major(dev), (unsigned int)minor(dev));
+               if ((fd_dasd = open(pathname, O_RDONLY)) == -1) {
+                       /* Devicenode does not exist. Try to create one */
+                       if (mknod(pathname, 0600 | S_IFBLK, dev) == -1) {
+                               /* Couldn't create a device node */
+                               return -1;
+                       }
+                       fd_dasd = open(pathname, O_RDONLY);
+                       /*
+                        * The file will vanish when the last process (we)
+                        * has ceased to access it.
+                        */
+                       unlink(pathname);
+               }
+               if (!fd_dasd) {
+                       /* Couldn't open the device */
+                       return -1;
+               }
+       } else {
+               fd_dasd = fd;
+       }
+
+       if (ioctl(fd_dasd, BIODASDINFO, (unsigned long)&info) != 0) {
+               goto out;
+       }
+
+       if (ioctl(fd_dasd, HDIO_GETGEO, (unsigned long)&geo) != 0) {
+               goto out;
+       }
+
+       if (ioctl(fd_dasd, BLKGETSIZE64, &disksize) != 0)
+               goto out;
+       disksize >>= 9;
+
+       if (ioctl(fd_dasd, BLKSSZGET, &blocksize) != 0)
+               goto out;
+
+       if (blocksize < 512 || blocksize > 4096)
+               goto out;
+
+       /*
+        * Get volume label, extract name and type.
+        */
+
+       if (!(data = (unsigned char *)malloc(blocksize)))
+               goto out;
+
+
+       if (lseek(fd_dasd, info.label_block * blocksize, SEEK_SET) == -1)
+               goto out;
+       if (read(fd_dasd, data, blocksize) == -1) {
+               perror("read");
+               goto out;
+       }
+
+       if ((!info.FBA_layout) && (!strcmp(info.type, "ECKD")))
+               memcpy (&vlabel, data, sizeof(vlabel));
+       else {
+               bzero(&vlabel,4);
+               memcpy (&vlabel.vollbl, data, sizeof(vlabel) - 4);
+       }
+       vtoc_ebcdic_dec(vlabel.vollbl, type, 4);
+
+       /*
+        * Three different types: CMS1, VOL1 and LNX1/unlabeled
+        */
+       if (strncmp(type, "CMS1", 4) == 0) {
+               /*
+                * VM style CMS1 labeled disk
+                */
+               unsigned int *label = (unsigned int *) &vlabel;
+
+               blocksize = label[4];
+               if (label[14] != 0) {
+                       /* disk is reserved minidisk */
+                       offset = label[14];
+                       size   = sectors512(label[8] - 1, blocksize);
+               } else {
+                       offset = info.label_block + 1;
+                       size   = sectors512(label[8], blocksize);
+               }
+               sp[0].start = sectors512(offset, blocksize);
+               sp[0].size  = size - sp[0].start;
+               retval = 1;
+       } else if ((strncmp(type, "VOL1", 4) == 0) &&
+               (!info.FBA_layout) && (!strcmp(info.type, "ECKD"))) {
+               /*
+                * New style VOL1 labeled disk
+                */
+               int counter;
+
+               /* get block number and read then go through format1 labels */
+               blk = cchhb2blk(&vlabel.vtoc, &geo) + 1;
+               counter = 0;
+               if (lseek(fd_dasd, blk * blocksize, SEEK_SET) == -1)
+                       goto out;
+
+               while (read(fd_dasd, data, blocksize) != -1) {
+                       format1_label_t f1;
+
+                       memcpy(&f1, data, sizeof(format1_label_t));
+
+                       /* skip FMT4 / FMT5 / FMT7 labels */
+                       if (EBCtoASC[f1.DS1FMTID] == '4'
+                           || EBCtoASC[f1.DS1FMTID] == '5'
+                           || EBCtoASC[f1.DS1FMTID] == '7'
+                           || EBCtoASC[f1.DS1FMTID] == '9') {
+                               blk++;
+                               continue;
+                       }
+
+                       /* only FMT1 and FMT8 valid at this point */
+                       if (EBCtoASC[f1.DS1FMTID] != '1' &&
+                           EBCtoASC[f1.DS1FMTID] != '8')
+                               break;
+
+                       /* OK, we got valid partition data */
+                       offset = cchh2blk(&f1.DS1EXT1.llimit, &geo);
+                       size  = cchh2blk(&f1.DS1EXT1.ulimit, &geo) -
+                               offset + geo.sectors;
+                       sp[counter].start = sectors512(offset, blocksize);
+                       sp[counter].size  = sectors512(size, blocksize);
+                       counter++;
+                       blk++;
+               }
+               retval = counter;
+       } else {
+               /*
+                * Old style LNX1 or unlabeled disk
+                */
+               if (strncmp(type, "LNX1", 4) == 0) {
+                       if (vlabel.ldl_version == 0xf2) {
+                               fmt_size = sectors512(vlabel.formatted_blocks,
+                                                     blocksize);
+                       } else if (!strcmp(info.type, "ECKD")) {
+                               /* formated w/o large volume support */
+                               fmt_size = geo.cylinders * geo.heads
+                                       * geo.sectors * (blocksize >> 9);
+                       } else {
+                               /* old label and no usable disk geometry
+                                * (e.g. DIAG) */
+                               fmt_size = disksize;
+                       }
+                       size = disksize;
+                       if (fmt_size < size)
+                               size = fmt_size;
+               } else
+                       size = disksize;
+
+               sp[0].start = sectors512(info.label_block + 1, blocksize);
+               sp[0].size  = size - sp[0].start;
+               retval = 1;
+       }
+
+ out:
+       if (data != NULL)
+               free(data);
+       if (fd_dasd != -1 && fd_dasd != fd)
+               close(fd_dasd);
+       return retval;
+}
diff --git a/kpartx/dasd.h b/kpartx/dasd.h
new file mode 100644 (file)
index 0000000..0ed7c80
--- /dev/null
@@ -0,0 +1,293 @@
+/*
+ * dasd.h
+ *
+ * IBM DASD partition table handling.
+ *
+ * Mostly taken from drivers/s390/block/dasd.c
+ *
+ * Copyright (c) 2005, Hannes Reinecke, SUSE Linux Products GmbH
+ * Copyright IBM Corporation, 2009
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ * USA.
+ */
+
+#ifndef _DASD_H
+#define _DASD_H
+
+typedef struct ttr 
+{
+        uint16_t tt;
+        uint8_t  r;
+} __attribute__ ((packed)) ttr_t;
+
+typedef struct cchhb 
+{
+        uint16_t cc;
+        uint16_t hh;
+        uint8_t b;
+} __attribute__ ((packed)) cchhb_t;
+
+typedef struct cchh 
+{
+        uint16_t cc;
+        uint16_t hh;
+} __attribute__ ((packed)) cchh_t;
+
+typedef struct labeldate 
+{
+        uint8_t  year;
+        uint16_t day;
+} __attribute__ ((packed)) labeldate_t;
+
+
+typedef struct volume_label 
+{
+        char volkey[4];         /* volume key = volume label                 */
+       char vollbl[4];         /* volume label                              */
+       char volid[6];          /* volume identifier                         */
+       uint8_t security;               /* security byte                             */
+       cchhb_t vtoc;           /* VTOC address                              */
+       char res1[5];           /* reserved                                  */
+        char cisize[4];                /* CI-size for FBA,...                       */
+                                /* ...blanks for CKD                         */
+       char blkperci[4];       /* no of blocks per CI (FBA), blanks for CKD */
+       char labperci[4];       /* no of labels per CI (FBA), blanks for CKD */
+       char res2[4];           /* reserved                                  */
+       char lvtoc[14];         /* owner code for LVTOC                      */
+       char res3[28];          /* reserved                                  */
+       char ldl_version;       /* version number, valid for ldl format      */
+       uint64_t formatted_blocks; /* valid when ldl_version >= f2           */
+} __attribute__ ((packed)) volume_label_t;
+
+
+typedef struct extent 
+{
+        uint8_t  typeind;          /* extent type indicator                     */
+        uint8_t  seqno;            /* extent sequence number                    */
+        cchh_t llimit;          /* starting point of this extent             */
+        cchh_t ulimit;          /* ending point of this extent               */
+} __attribute__ ((packed)) extent_t;
+
+
+typedef struct dev_const 
+{
+        uint16_t DS4DSCYL;           /* number of logical cyls                  */
+        uint16_t DS4DSTRK;           /* number of tracks in a logical cylinder  */
+        uint16_t DS4DEVTK;           /* device track length                     */
+        uint8_t  DS4DEVI;            /* non-last keyed record overhead          */
+        uint8_t  DS4DEVL;            /* last keyed record overhead              */
+        uint8_t  DS4DEVK;            /* non-keyed record overhead differential  */
+        uint8_t  DS4DEVFG;           /* flag byte                               */
+        uint16_t DS4DEVTL;           /* device tolerance                        */
+        uint8_t  DS4DEVDT;           /* number of DSCB's per track              */
+        uint8_t  DS4DEVDB;           /* number of directory blocks per track    */
+} __attribute__ ((packed)) dev_const_t;
+
+
+typedef struct format1_label 
+{
+       char  DS1DSNAM[44];       /* data set name                           */
+       uint8_t  DS1FMTID;           /* format identifier                       */
+       char  DS1DSSN[6];         /* data set serial number                  */
+       uint16_t DS1VOLSQ;           /* volume sequence number                  */
+       labeldate_t DS1CREDT;     /* creation date: ydd                      */
+       labeldate_t DS1EXPDT;     /* expiration date                         */
+       uint8_t  DS1NOEPV;           /* number of extents on volume             */
+        uint8_t  DS1NOBDB;           /* no. of bytes used in last direction blk */
+       uint8_t  DS1FLAG1;           /* flag 1                                  */
+       char  DS1SYSCD[13];       /* system code                             */
+       labeldate_t DS1REFD;      /* date last referenced                    */
+        uint8_t  DS1SMSFG;           /* system managed storage indicators       */
+        uint8_t  DS1SCXTF;           /* sec. space extension flag byte          */
+        uint16_t DS1SCXTV;           /* secondary space extension value         */
+        uint8_t  DS1DSRG1;           /* data set organisation byte 1            */
+        uint8_t  DS1DSRG2;           /* data set organisation byte 2            */
+       uint8_t  DS1RECFM;           /* record format                           */
+       uint8_t  DS1OPTCD;           /* option code                             */
+       uint16_t DS1BLKL;            /* block length                            */
+       uint16_t DS1LRECL;           /* record length                           */
+       uint8_t  DS1KEYL;            /* key length                              */
+       uint16_t DS1RKP;             /* relative key position                   */
+       uint8_t  DS1DSIND;           /* data set indicators                     */
+        uint8_t  DS1SCAL1;           /* secondary allocation flag byte          */
+       char DS1SCAL3[3];         /* secondary allocation quantity           */
+       ttr_t DS1LSTAR;           /* last used track and block on track      */
+       uint16_t DS1TRBAL;           /* space remaining on last used track      */
+        uint16_t res1;               /* reserved                                */
+       extent_t DS1EXT1;         /* first extent description                */
+       extent_t DS1EXT2;         /* second extent description               */
+       extent_t DS1EXT3;         /* third extent description                */
+       cchhb_t DS1PTRDS;         /* possible pointer to f2 or f3 DSCB       */
+} __attribute__ ((packed)) format1_label_t;
+
+
+/*
+ * struct dasd_information_t
+ * represents any data about the data, which is visible to userspace
+ */
+typedef struct dasd_information_t {
+       unsigned int devno;             /* S/390 devno */
+       unsigned int real_devno;        /* for aliases */
+       unsigned int schid;             /* S/390 subchannel identifier */
+       unsigned int cu_type  : 16;     /* from SenseID */
+       unsigned int cu_model :  8;     /* from SenseID */
+       unsigned int dev_type : 16;     /* from SenseID */
+       unsigned int dev_model : 8;     /* from SenseID */
+       unsigned int open_count;
+       unsigned int req_queue_len;
+       unsigned int chanq_len;         /* length of chanq */
+       char type[4];                   /* from discipline.name, 'none' for unknown */
+       unsigned int status;            /* current device level */
+       unsigned int label_block;       /* where to find the VOLSER */
+       unsigned int FBA_layout;        /* fixed block size (like AIXVOL) */
+       unsigned int characteristics_size;
+       unsigned int confdata_size;
+       char characteristics[64];       /* from read_device_characteristics */
+       char configuration_data[256];   /* from read_configuration_data */
+} dasd_information_t;
+
+#define DASD_IOCTL_LETTER       'D'
+#define BIODASDINFO _IOR(DASD_IOCTL_LETTER,1,dasd_information_t)
+#define BLKGETSIZE _IO(0x12,96)
+#define BLKSSZGET _IO(0x12,104)
+#define BLKGETSIZE64 _IOR(0x12,114,size_t) /* device size in bytes (u64 *arg)*/
+
+/*
+ * Only compile this on S/390. Doesn't make any sense
+ * for other architectures.
+ */
+
+static unsigned char EBCtoASC[256] =
+{
+/* 0x00  NUL   SOH   STX   ETX  *SEL    HT  *RNL   DEL */
+       0x00, 0x01, 0x02, 0x03, 0x07, 0x09, 0x07, 0x7F,
+/* 0x08  -GE  -SPS  -RPT    VT    FF    CR    SO    SI */
+       0x07, 0x07, 0x07, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+/* 0x10  DLE   DC1   DC2   DC3  -RES   -NL    BS  -POC
+                                -ENP  ->LF             */
+       0x10, 0x11, 0x12, 0x13, 0x07, 0x0A, 0x08, 0x07,
+/* 0x18  CAN    EM  -UBS  -CU1  -IFS  -IGS  -IRS  -ITB
+                                                  -IUS */
+       0x18, 0x19, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+/* 0x20  -DS  -SOS    FS  -WUS  -BYP    LF   ETB   ESC
+                                -INP                   */
+       0x07, 0x07, 0x1C, 0x07, 0x07, 0x0A, 0x17, 0x1B,
+/* 0x28  -SA  -SFE   -SM  -CSP  -MFA   ENQ   ACK   BEL
+                     -SW                               */
+       0x07, 0x07, 0x07, 0x07, 0x07, 0x05, 0x06, 0x07,
+/* 0x30 ----  ----   SYN   -IR   -PP  -TRN  -NBS   EOT */
+       0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04,
+/* 0x38 -SBS   -IT  -RFF  -CU3   DC4   NAK  ----   SUB */
+       0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A,
+/* 0x40   SP   RSP           Ã¤              ----       */
+       0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86,
+/* 0x48                      .     <     (     +     | */
+       0x87, 0xA4, 0x9B, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
+/* 0x50    &                                      ---- */
+       0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07,
+/* 0x58          ÃŸ     !     $     *     )     ;       */
+       0x8D, 0xE1, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAA,
+/* 0x60    -     /  ----     Ã„  ----  ----  ----       */
+       0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F,
+/* 0x68             ----     ,     %     _     >     ? */
+       0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
+/* 0x70  ---        ----  ----  ----  ----  ----  ---- */
+       0x07, 0x90, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+/* 0x78    *     `     :     #     @     '     =     " */
+       0x70, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
+/* 0x80    *     a     b     c     d     e     f     g */
+       0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+/* 0x88    h     i              ----  ----  ----       */
+       0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1,
+/* 0x90    Â°     j     k     l     m     n     o     p */
+       0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
+/* 0x98    q     r                    ----        ---- */
+       0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07,
+/* 0xA0          ~     s     t     u     v     w     x */
+       0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+/* 0xA8    y     z              ----  ----  ----  ---- */
+       0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07,
+/* 0xB0    ^                    ----     Â§  ----       */
+       0x5E, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC,
+/* 0xB8       ----     [     ]  ----  ----  ----  ---- */
+       0xAB, 0x07, 0x5B, 0x5D, 0x07, 0x07, 0x07, 0x07,
+/* 0xC0    {     A     B     C     D     E     F     G */
+       0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+/* 0xC8    H     I  ----           Ã¶              ---- */
+       0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07,
+/* 0xD0    }     J     K     L     M     N     O     P */
+       0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+/* 0xD8    Q     R  ----           Ã¼                   */
+       0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98,
+/* 0xE0    \           S     T     U     V     W     X */
+       0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+/* 0xE8    Y     Z        ----     Ã–  ----  ----  ---- */
+       0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07,
+/* 0xF0    0     1     2     3     4     5     6     7 */
+       0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+/* 0xF8    8     9  ----  ----     Ãœ  ----  ----  ---- */
+       0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07
+};
+
+static inline void 
+vtoc_ebcdic_dec (const char *source, char *target, int l)
+{
+       int i;
+
+       for (i = 0; i < l; i++) 
+               target[i]=(char)EBCtoASC[(unsigned char)(source[i])];
+}
+
+/*
+ * compute the block number from a
+ * cyl-cyl-head-head structure
+ */
+static inline uint64_t
+cchh2blk (cchh_t *ptr, struct hd_geometry *geo)
+{
+       uint64_t cyl;
+       uint16_t head;
+
+       /*decode cylinder and heads for large volumes */
+       cyl = ptr->hh & 0xFFF0;
+       cyl <<= 12;
+       cyl |= ptr->cc;
+       head = ptr->hh & 0x000F;
+       return cyl * geo->heads * geo->sectors +
+              head * geo->sectors;
+}
+
+/*
+ * compute the block number from a
+ * cyl-cyl-head-head-block structure
+ */
+static inline uint64_t
+cchhb2blk (cchhb_t *ptr, struct hd_geometry *geo)
+{
+       uint64_t cyl;
+       uint16_t head;
+
+       /*decode cylinder and heads for large volumes */
+       cyl = ptr->hh & 0xFFF0;
+       cyl <<= 12;
+       cyl |= ptr->cc;
+       head = ptr->hh & 0x000F;
+       return  cyl * geo->heads * geo->sectors +
+               head * geo->sectors +
+               ptr->b;
+}
+
+#endif /* _DASD_H */
diff --git a/kpartx/devmapper.c b/kpartx/devmapper.c
new file mode 100644 (file)
index 0000000..f884511
--- /dev/null
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2004, 2005 Christophe Varoqui
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <libdevmapper.h>
+#include <ctype.h>
+#include <errno.h>
+#include "devmapper.h"
+
+#define UUID_PREFIX "part%d-"
+#define MAX_PREFIX_LEN 8
+
+extern int
+dm_prereq (char * str, int x, int y, int z)
+{
+       int r = 1;
+       struct dm_task *dmt;
+       struct dm_versions *target;
+       struct dm_versions *last_target;
+
+       if (!(dmt = dm_task_create(DM_DEVICE_LIST_VERSIONS)))
+               return 1;
+
+       dm_task_no_open_count(dmt);
+
+       if (!dm_task_run(dmt))
+               goto out;
+
+       target = dm_task_get_versions(dmt);
+
+       /* Fetch targets and print 'em */
+       do {
+               last_target = target;
+
+               if (!strncmp(str, target->name, strlen(str)) &&
+                   /* dummy prereq on multipath version */
+                   target->version[0] >= x &&
+                   target->version[1] >= y &&
+                   target->version[2] >= z
+                  )
+                       r = 0;
+
+               target = (void *) target + target->next;
+       } while (last_target != target);
+
+       out:
+       dm_task_destroy(dmt);
+       return r;
+}
+
+extern int
+dm_simplecmd (int task, const char *name, int no_flush, uint32_t *cookie) {
+       int r = 0;
+       int udev_wait_flag = (task == DM_DEVICE_RESUME ||
+                             task == DM_DEVICE_REMOVE);
+       struct dm_task *dmt;
+
+       if (!(dmt = dm_task_create(task)))
+               return 0;
+
+       if (!dm_task_set_name(dmt, name))
+               goto out;
+
+       dm_task_no_open_count(dmt);
+       dm_task_skip_lockfs(dmt);
+
+       if (no_flush)
+               dm_task_no_flush(dmt);
+
+       if (udev_wait_flag && !dm_task_set_cookie(dmt, cookie, 0))
+               goto out;
+       r = dm_task_run(dmt);
+
+       out:
+       dm_task_destroy(dmt);
+       return r;
+}
+
+extern int
+dm_addmap (int task, const char *name, const char *target,
+          const char *params, uint64_t size, const char *uuid, int part,
+          mode_t mode, uid_t uid, gid_t gid, uint32_t *cookie) {
+       int r = 0;
+       struct dm_task *dmt;
+       char *prefixed_uuid = NULL;
+
+       if (!(dmt = dm_task_create (task)))
+               return 0;
+
+       if (!dm_task_set_name (dmt, name))
+               goto addout;
+
+       if (!dm_task_add_target (dmt, 0, size, target, params))
+               goto addout;
+
+       if (task == DM_DEVICE_CREATE && uuid) {
+               prefixed_uuid = malloc(MAX_PREFIX_LEN + strlen(uuid) + 1);
+               if (!prefixed_uuid) {
+                       fprintf(stderr, "cannot create prefixed uuid : %s\n",
+                               strerror(errno));
+                       goto addout;
+               }
+               sprintf(prefixed_uuid, UUID_PREFIX "%s", part, uuid);
+               if (!dm_task_set_uuid(dmt, prefixed_uuid))
+                       goto addout;
+       }
+
+       if (!dm_task_set_mode(dmt, mode))
+               goto addout;
+       if (!dm_task_set_uid(dmt, uid))
+               goto addout;
+       if (!dm_task_set_gid(dmt, gid))
+               goto addout;
+
+       dm_task_no_open_count(dmt);
+
+       if (task == DM_DEVICE_CREATE && !dm_task_set_cookie(dmt, cookie, 0))
+               goto addout;
+       r = dm_task_run (dmt);
+
+       addout:
+       dm_task_destroy (dmt);
+
+       return r;
+}
+
+extern int
+dm_map_present (char * str)
+{
+       int r = 0;
+       struct dm_task *dmt;
+       struct dm_info info;
+
+       if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
+               return 0;
+
+       if (!dm_task_set_name(dmt, str))
+               goto out;
+
+       dm_task_no_open_count(dmt);
+
+       if (!dm_task_run(dmt))
+               goto out;
+
+       if (!dm_task_get_info(dmt, &info))
+               goto out;
+
+       if (info.exists)
+               r = 1;
+out:
+       dm_task_destroy(dmt);
+       return r;
+}
+
+
+char *
+dm_mapname(int major, int minor)
+{
+       struct dm_task *dmt;
+       char *mapname = NULL;
+       const char *map;
+
+       if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
+               return NULL;
+
+       dm_task_no_open_count(dmt);
+       dm_task_set_major(dmt, major);
+       dm_task_set_minor(dmt, minor);
+
+       if (!dm_task_run(dmt))
+               goto out;
+
+       map = dm_task_get_name(dmt);
+       if (map && strlen(map))
+               mapname = strdup(map);
+
+out:
+       dm_task_destroy(dmt);
+       return mapname;
+}
+
+/*
+ * dm_get_first_dep
+ *
+ * Return the device number of the first dependend device
+ * for a given target.
+ */
+dev_t dm_get_first_dep(char *devname)
+{
+       struct dm_task *dmt;
+       struct dm_deps *dm_deps;
+       dev_t ret = 0;
+
+       if ((dmt = dm_task_create(DM_DEVICE_DEPS)) == NULL) {
+               return ret;
+       }
+       if (!dm_task_set_name(dmt, devname)) {
+               goto out;
+       }
+       if (!dm_task_run(dmt)) {
+               goto out;
+       }
+       if ((dm_deps = dm_task_get_deps(dmt)) == NULL) {
+               goto out;
+       }
+       if (dm_deps->count > 0) {
+               ret = dm_deps->device[0];
+       }
+out:
+       dm_task_destroy(dmt);
+
+       return ret;
+}
+
+char *
+dm_mapuuid(int major, int minor)
+{
+       struct dm_task *dmt;
+       const char *tmp;
+       char *uuid = NULL;
+
+       if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
+               return NULL;
+
+       dm_task_no_open_count(dmt);
+       dm_task_set_major(dmt, major);
+       dm_task_set_minor(dmt, minor);
+
+       if (!dm_task_run(dmt))
+               goto out;
+
+       tmp = dm_task_get_uuid(dmt);
+       if (tmp[0] != '\0')
+               uuid = strdup(tmp);
+out:
+       dm_task_destroy(dmt);
+       return uuid;
+}
+
+int
+dm_devn (char * mapname, int *major, int *minor)
+{
+       int r = 1;
+       struct dm_task *dmt;
+       struct dm_info info;
+
+       if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
+               return 0;
+
+       if (!dm_task_set_name(dmt, mapname))
+               goto out;
+
+       if (!dm_task_run(dmt))
+               goto out;
+
+       if (!dm_task_get_info(dmt, &info))
+               goto out;
+
+       *major = info.major;
+       *minor = info.minor;
+
+       r = 0;
+out:
+       dm_task_destroy(dmt);
+       return r;
+}
+
diff --git a/kpartx/devmapper.h b/kpartx/devmapper.h
new file mode 100644 (file)
index 0000000..f8692cc
--- /dev/null
@@ -0,0 +1,13 @@
+#define MAJOR(dev)      ((dev & 0xfff00) >> 8)
+#define MINOR(dev)      ((dev & 0xff) | ((dev >> 12) & 0xfff00))
+#define MKDEV(ma,mi)    ((mi & 0xff) | (ma << 8) | ((mi & ~0xff) << 12))
+
+int dm_prereq (char *, int, int, int);
+int dm_simplecmd (int, const char *, int, uint32_t *);
+int dm_addmap (int, const char *, const char *, const char *, uint64_t,
+              const char *, int, mode_t, uid_t, gid_t, uint32_t *);
+int dm_map_present (char *);
+char * dm_mapname(int major, int minor);
+dev_t dm_get_first_dep(char *devname);
+char * dm_mapuuid(int major, int minor);
+int dm_devn (char * mapname, int *major, int *minor);
diff --git a/kpartx/dos.c b/kpartx/dos.c
new file mode 100644 (file)
index 0000000..1691105
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Source: copy of util-linux' partx dos.c
+ *
+ * Copyrights of the original file apply 
+ * Copyright (c) 2005 Bastian Blank
+ */
+#include "kpartx.h"
+#include "byteorder.h"
+#include <stdio.h>
+#include <string.h>
+#include "dos.h"
+
+static int
+is_extended(int type) {
+       return (type == 5 || type == 0xf || type == 0x85);
+}
+
+static int
+read_extended_partition(int fd, struct partition *ep, int en,
+                       struct slice *sp, int ns)
+{
+       struct partition p;
+       unsigned long start, here, next;
+       unsigned char *bp;
+       int loopct = 0;
+       int moretodo = 1;
+       int i, n=0;
+
+       next = start = le32_to_cpu(ep->start_sect);
+
+       while (moretodo) {
+               here = next;
+               moretodo = 0;
+               if (++loopct > 100)
+                       return n;
+
+               bp = (unsigned char *)getblock(fd, here);
+               if (bp == NULL)
+                       return n;
+
+               if (bp[510] != 0x55 || bp[511] != 0xaa)
+                       return n;
+
+               for (i=0; i<2; i++) {
+                       memcpy(&p, bp + 0x1be + i * sizeof (p), sizeof (p));
+                       if (is_extended(p.sys_type)) {
+                               if (p.nr_sects && !moretodo) {
+                                       next = start + le32_to_cpu(p.start_sect);
+                                       moretodo = 1;
+                               }
+                               continue;
+                       }
+                       if (n < ns) {
+                               sp[n].start = here + le32_to_cpu(p.start_sect);
+                               sp[n].size = le32_to_cpu(p.nr_sects);
+                               sp[n].container = en + 1;
+                               n++;
+                       } else {
+                               fprintf(stderr,
+                                   "dos_extd_partition: too many slices\n");
+                               return n;
+                       }
+                       loopct = 0;
+               }
+       }
+       return n;
+}
+
+static int
+is_gpt(int type) {
+       return (type == 0xEE);
+}
+
+int
+read_dos_pt(int fd, struct slice all, struct slice *sp, int ns) {
+       struct partition p;
+       unsigned long offset = all.start;
+       int i, n=4;
+       unsigned char *bp;
+
+       bp = (unsigned char *)getblock(fd, offset);
+       if (bp == NULL)
+               return -1;
+
+       if (bp[510] != 0x55 || bp[511] != 0xaa)
+               return -1;
+
+       for (i=0; i<4; i++) {
+               memcpy(&p, bp + 0x1be + i * sizeof (p), sizeof (p));
+               if (is_gpt(p.sys_type))
+                       return 0;
+               if (i < ns) {
+                       sp[i].start =  le32_to_cpu(p.start_sect);
+                       sp[i].size = le32_to_cpu(p.nr_sects);
+               } else {
+                       fprintf(stderr,
+                               "dos_partition: too many slices\n");
+                       break;
+               }
+               if (is_extended(p.sys_type)) {
+                       n += read_extended_partition(fd, &p, i, sp+n, ns-n);
+               }
+       }
+       return n;
+}
diff --git a/kpartx/dos.h b/kpartx/dos.h
new file mode 100644 (file)
index 0000000..f45e7f6
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef DOS_H_INCLUDED
+#define DOS_H_INCLUDED
+
+struct partition {
+       unsigned char boot_ind; /* 0x80 - active */
+       unsigned char bh, bs, bc;
+       unsigned char sys_type;
+       unsigned char eh, es, ec;
+       unsigned int start_sect;
+       unsigned int nr_sects;
+} __attribute__((packed));
+
+#endif                         /* DOS_H_INCLUDED */
diff --git a/kpartx/efi.h b/kpartx/efi.h
new file mode 100644 (file)
index 0000000..1cbd961
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+  efi.[ch] - Manipulates EFI variables as exported in /proc/efi/vars
+  Copyright (C) 2001 Dell Computer Corporation <Matt_Domsch@dell.com>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef EFI_H
+#define EFI_H
+
+/*
+ * Extensible Firmware Interface
+ * Based on 'Extensible Firmware Interface Specification'
+ *      version 1.02, 12 December, 2000
+ */
+#include <stdint.h>
+#include <string.h>
+
+typedef struct {
+       uint8_t  b[16];
+} efi_guid_t;
+
+#define EFI_GUID(a,b,c,d0,d1,d2,d3,d4,d5,d6,d7) \
+((efi_guid_t) \
+{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
+  (b) & 0xff, ((b) >> 8) & 0xff, \
+  (c) & 0xff, ((c) >> 8) & 0xff, \
+  (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
+
+
+/******************************************************
+ * GUIDs
+ ******************************************************/
+#define NULL_GUID \
+EFI_GUID( 0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)
+
+static inline int
+efi_guidcmp(efi_guid_t left, efi_guid_t right)
+{
+       return memcmp(&left, &right, sizeof (efi_guid_t));
+}
+
+typedef uint16_t efi_char16_t;         /* UNICODE character */
+
+#endif /* EFI_H */
diff --git a/kpartx/gpt.c b/kpartx/gpt.c
new file mode 100644 (file)
index 0000000..047a829
--- /dev/null
@@ -0,0 +1,647 @@
+/*
+    gpt.[ch]
+
+    Copyright (C) 2000-2001 Dell Computer Corporation <Matt_Domsch@dell.com> 
+
+    EFI GUID Partition Table handling
+    Per Intel EFI Specification v1.02
+    http://developer.intel.com/technology/efi/efi.htm
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include "gpt.h"
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <endian.h>
+#include <byteswap.h>
+#include <linux/fs.h>
+#include "crc32.h"
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#  define __le16_to_cpu(x) (x)
+#  define __le32_to_cpu(x) (x)
+#  define __le64_to_cpu(x) (x)
+#  define __cpu_to_le32(x) (x)
+#elif BYTE_ORDER == BIG_ENDIAN
+#  define __le16_to_cpu(x) bswap_16(x)
+#  define __le32_to_cpu(x) bswap_32(x)
+#  define __le64_to_cpu(x) bswap_64(x)
+#  define __cpu_to_le32(x) bswap_32(x)
+#endif
+
+#ifndef BLKGETLASTSECT
+#define BLKGETLASTSECT  _IO(0x12,108)   /* get last sector of block device */
+#endif
+#ifndef BLKGETSIZE
+#define BLKGETSIZE _IO(0x12,96)                /* return device size */
+#endif
+#ifndef BLKSSZGET
+#define BLKSSZGET  _IO(0x12,104)       /* get block device sector size */
+#endif
+#ifndef BLKGETSIZE64
+#define BLKGETSIZE64 _IOR(0x12,114,sizeof(uint64_t))   /* return device size in bytes (u64 *arg) */
+#endif
+
+struct blkdev_ioctl_param {
+        unsigned int block;
+        size_t content_length;
+        char * block_contents;
+};
+
+/**
+ * efi_crc32() - EFI version of crc32 function
+ * @buf: buffer to calculate crc32 of
+ * @len - length of buf
+ *
+ * Description: Returns EFI-style CRC32 value for @buf
+ * 
+ * This function uses the little endian Ethernet polynomial
+ * but seeds the function with ~0, and xor's with ~0 at the end.
+ * Note, the EFI Specification, v1.02, has a reference to
+ * Dr. Dobbs Journal, May 1994 (actually it's in May 1992).
+ */
+static inline uint32_t
+efi_crc32(const void *buf, unsigned long len)
+{
+       return (crc32(~0L, buf, len) ^ ~0L);
+}
+
+/**
+ * is_pmbr_valid(): test Protective MBR for validity
+ * @mbr: pointer to a legacy mbr structure
+ *
+ * Description: Returns 1 if PMBR is valid, 0 otherwise.
+ * Validity depends on two things:
+ *  1) MSDOS signature is in the last two bytes of the MBR
+ *  2) One partition of type 0xEE is found
+ */
+static int
+is_pmbr_valid(legacy_mbr *mbr)
+{
+       int i, found = 0, signature = 0;
+       if (!mbr)
+               return 0;
+       signature = (__le16_to_cpu(mbr->signature) == MSDOS_MBR_SIGNATURE);
+       for (i = 0; signature && i < 4; i++) {
+               if (mbr->partition[i].sys_type ==
+                    EFI_PMBR_OSTYPE_EFI_GPT) {
+                       found = 1;
+                       break;
+               }
+       }
+       return (signature && found);
+}
+
+
+/************************************************************
+ * get_sector_size
+ * Requires:
+ *  - filedes is an open file descriptor, suitable for reading
+ * Modifies: nothing
+ * Returns:
+ *  sector size, or 512.
+ ************************************************************/
+static int
+get_sector_size(int filedes)
+{
+       int rc, sector_size = 512;
+
+       rc = ioctl(filedes, BLKSSZGET, &sector_size);
+       if (rc)
+               sector_size = 512;
+       return sector_size;
+}
+
+/************************************************************
+ * _get_num_sectors
+ * Requires:
+ *  - filedes is an open file descriptor, suitable for reading
+ * Modifies: nothing
+ * Returns:
+ *  Last LBA value on success 
+ *  0 on error
+ *
+ * Try getting BLKGETSIZE64 and BLKSSZGET first,
+ * then BLKGETSIZE if necessary.
+ *  Kernels 2.4.15-2.4.18 and 2.5.0-2.5.3 have a broken BLKGETSIZE64
+ *  which returns the number of 512-byte sectors, not the size of
+ *  the disk in bytes. Fixed in kernels 2.4.18-pre8 and 2.5.4-pre3.
+ ************************************************************/
+static uint64_t
+_get_num_sectors(int filedes)
+{
+       int rc;
+       uint64_t bytes=0;
+
+       rc = ioctl(filedes, BLKGETSIZE64, &bytes);
+       if (!rc)
+               return bytes / get_sector_size(filedes);
+
+       return 0;
+}
+
+/************************************************************
+ * last_lba(): return number of last logical block of device
+ * 
+ * @fd
+ * 
+ * Description: returns Last LBA value on success, 0 on error.
+ * Notes: The value st_blocks gives the size of the file
+ *        in 512-byte blocks, which is OK if
+ *        EFI_BLOCK_SIZE_SHIFT == 9.
+ ************************************************************/
+
+static uint64_t
+last_lba(int filedes)
+{
+       int rc;
+       uint64_t sectors = 0;
+       struct stat s;
+       memset(&s, 0, sizeof (s));
+       rc = fstat(filedes, &s);
+       if (rc == -1) {
+               fprintf(stderr, "last_lba() could not stat: %s\n",
+                       strerror(errno));
+               return 0;
+       }
+
+       if (S_ISBLK(s.st_mode)) {
+               sectors = _get_num_sectors(filedes);
+       } else {
+               fprintf(stderr,
+                       "last_lba(): I don't know how to handle files with mode %x\n",
+                       s.st_mode);
+               sectors = 1;
+       }
+
+       return sectors ? sectors - 1 : 0;
+}
+
+
+static ssize_t
+read_lastoddsector(int fd, uint64_t lba, void *buffer, size_t count)
+{
+        int rc;
+        struct blkdev_ioctl_param ioctl_param;
+
+        if (!buffer) return 0; 
+
+        ioctl_param.block = 0; /* read the last sector */
+        ioctl_param.content_length = count;
+        ioctl_param.block_contents = buffer;
+
+        rc = ioctl(fd, BLKGETLASTSECT, &ioctl_param);
+        if (rc == -1) perror("read failed");
+
+        return !rc;
+}
+
+static ssize_t
+read_lba(int fd, uint64_t lba, void *buffer, size_t bytes)
+{
+       int sector_size = get_sector_size(fd);
+       off_t offset = lba * sector_size;
+       uint64_t lastlba;
+        ssize_t bytesread;
+
+       lseek(fd, offset, SEEK_SET);
+       bytesread = read(fd, buffer, bytes);
+
+       lastlba = last_lba(fd);
+       if (!lastlba)
+               return bytesread;
+
+        /* Kludge.  This is necessary to read/write the last
+           block of an odd-sized disk, until Linux 2.5.x kernel fixes.
+           This is only used by gpt.c, and only to read
+           one sector, so we don't have to be fancy.
+        */
+        if (!bytesread && !(lastlba & 1) && lba == lastlba) {
+                bytesread = read_lastoddsector(fd, lba, buffer, bytes);
+        }
+        return bytesread;
+}
+
+/**
+ * alloc_read_gpt_entries(): reads partition entries from disk
+ * @fd  is an open file descriptor to the whole disk
+ * @gpt is a buffer into which the GPT will be put  
+ * Description: Returns ptes on success,  NULL on error.
+ * Allocates space for PTEs based on information found in @gpt.
+ * Notes: remember to free pte when you're done!
+ */
+static gpt_entry *
+alloc_read_gpt_entries(int fd, gpt_header * gpt)
+{
+       gpt_entry *pte;
+        size_t count = __le32_to_cpu(gpt->num_partition_entries) *
+                __le32_to_cpu(gpt->sizeof_partition_entry);
+
+        if (!count) return NULL;
+
+       pte = (gpt_entry *)malloc(count);
+       if (!pte)
+               return NULL;
+       memset(pte, 0, count);
+
+       if (!read_lba(fd, __le64_to_cpu(gpt->partition_entry_lba), pte,
+                      count)) {
+               free(pte);
+               return NULL;
+       }
+       return pte;
+}
+
+/**
+ * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk
+ * @fd  is an open file descriptor to the whole disk
+ * @lba is the Logical Block Address of the partition table
+ * 
+ * Description: returns GPT header on success, NULL on error.   Allocates
+ * and fills a GPT header starting at @ from @bdev.
+ * Note: remember to free gpt when finished with it.
+ */
+static gpt_header *
+alloc_read_gpt_header(int fd, uint64_t lba)
+{
+       gpt_header *gpt;
+       gpt = (gpt_header *)
+           malloc(sizeof (gpt_header));
+       if (!gpt)
+               return NULL;
+       memset(gpt, 0, sizeof (*gpt));
+       if (!read_lba(fd, lba, gpt, sizeof (gpt_header))) {
+               free(gpt);
+               return NULL;
+       }
+
+       return gpt;
+}
+
+/**
+ * is_gpt_valid() - tests one GPT header and PTEs for validity
+ * @fd  is an open file descriptor to the whole disk
+ * @lba is the logical block address of the GPT header to test
+ * @gpt is a GPT header ptr, filled on return.
+ * @ptes is a PTEs ptr, filled on return.
+ *
+ * Description: returns 1 if valid,  0 on error.
+ * If valid, returns pointers to newly allocated GPT header and PTEs.
+ */
+static int
+is_gpt_valid(int fd, uint64_t lba,
+             gpt_header ** gpt, gpt_entry ** ptes)
+{
+       int rc = 0;             /* default to not valid */
+       uint32_t crc, origcrc;
+
+       if (!gpt || !ptes)
+                return 0;
+       if (!(*gpt = alloc_read_gpt_header(fd, lba)))
+               return 0;
+
+       /* Check the GUID Partition Table signature */
+       if (__le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) {
+               /* 
+                  printf("GUID Partition Table Header signature is wrong: %" PRIx64" != %" PRIx64 "\n",
+                  __le64_to_cpu((*gpt)->signature), GUID_PT_HEADER_SIGNATURE);
+                */
+               free(*gpt);
+               *gpt = NULL;
+               return rc;
+       }
+
+       /* Check the GUID Partition Table Header CRC */
+       origcrc = __le32_to_cpu((*gpt)->header_crc32);
+       (*gpt)->header_crc32 = 0;
+       crc = efi_crc32(*gpt, __le32_to_cpu((*gpt)->header_size));
+       if (crc != origcrc) {
+               // printf( "GPTH CRC check failed, %x != %x.\n", origcrc, crc);
+               (*gpt)->header_crc32 = __cpu_to_le32(origcrc);
+               free(*gpt);
+               *gpt = NULL;
+               return 0;
+       }
+       (*gpt)->header_crc32 = __cpu_to_le32(origcrc);
+
+       /* Check that the my_lba entry points to the LBA
+        * that contains the GPT we read */
+       if (__le64_to_cpu((*gpt)->my_lba) != lba) {
+               /*
+               printf( "my_lba % PRIx64 "x != lba %"PRIx64 "x.\n",
+                               __le64_to_cpu((*gpt)->my_lba), lba);
+                */
+               free(*gpt);
+               *gpt = NULL;
+               return 0;
+       }
+
+       if (!(*ptes = alloc_read_gpt_entries(fd, *gpt))) {
+               free(*gpt);
+               *gpt = NULL;
+               return 0;
+       }
+
+       /* Check the GUID Partition Entry Array CRC */
+       crc = efi_crc32(*ptes,
+                        __le32_to_cpu((*gpt)->num_partition_entries) *
+                       __le32_to_cpu((*gpt)->sizeof_partition_entry));
+       if (crc != __le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
+               // printf("GUID Partitition Entry Array CRC check failed.\n");
+               free(*gpt);
+               *gpt = NULL;
+               free(*ptes);
+               *ptes = NULL;
+               return 0;
+       }
+
+       /* We're done, all's well */
+       return 1;
+}
+/**
+ * compare_gpts() - Search disk for valid GPT headers and PTEs
+ * @pgpt is the primary GPT header
+ * @agpt is the alternate GPT header
+ * @lastlba is the last LBA number
+ * Description: Returns nothing.  Sanity checks pgpt and agpt fields
+ * and prints warnings on discrepancies.
+ * 
+ */
+static void
+compare_gpts(gpt_header *pgpt, gpt_header *agpt, uint64_t lastlba)
+{
+       int error_found = 0;
+       if (!pgpt || !agpt)
+               return;
+       if (__le64_to_cpu(pgpt->my_lba) != __le64_to_cpu(agpt->alternate_lba)) {
+               error_found++;
+               fprintf(stderr, 
+                      "GPT:Primary header LBA != Alt. header alternate_lba\n");
+#ifdef DEBUG
+               fprintf(stderr,  "GPT:%" PRIx64 " != %" PRIx64 "\n",
+                      __le64_to_cpu(pgpt->my_lba),
+                       __le64_to_cpu(agpt->alternate_lba));
+#endif
+       }
+       if (__le64_to_cpu(pgpt->alternate_lba) != __le64_to_cpu(agpt->my_lba)) {
+               error_found++;
+               fprintf(stderr, 
+                      "GPT:Primary header alternate_lba != Alt. header my_lba\n");
+#ifdef DEBUG
+               fprintf(stderr,  "GPT:%" PRIx64 " != %" PRIx64 "\n",
+                      __le64_to_cpu(pgpt->alternate_lba),
+                       __le64_to_cpu(agpt->my_lba));
+#endif
+       }
+       if (__le64_to_cpu(pgpt->first_usable_lba) !=
+            __le64_to_cpu(agpt->first_usable_lba)) {
+               error_found++;
+               fprintf(stderr,  "GPT:first_usable_lbas don't match.\n");
+#ifdef DEBUG
+               fprintf(stderr,  "GPT:%" PRIx64 " != %" PRIx64 "\n",
+                      __le64_to_cpu(pgpt->first_usable_lba),
+                       __le64_to_cpu(agpt->first_usable_lba));
+#endif
+       }
+       if (__le64_to_cpu(pgpt->last_usable_lba) !=
+            __le64_to_cpu(agpt->last_usable_lba)) {
+               error_found++;
+               fprintf(stderr,  "GPT:last_usable_lbas don't match.\n");
+#ifdef DEBUG
+               fprintf(stderr,  "GPT:%" PRIx64 " != %" PRIx64 "\n",
+                      __le64_to_cpu(pgpt->last_usable_lba),
+                       __le64_to_cpu(agpt->last_usable_lba));
+#endif
+       }
+       if (efi_guidcmp(pgpt->disk_guid, agpt->disk_guid)) {
+               error_found++;
+               fprintf(stderr,  "GPT:disk_guids don't match.\n");
+       }
+       if (__le32_to_cpu(pgpt->num_partition_entries) !=
+            __le32_to_cpu(agpt->num_partition_entries)) {
+               error_found++;
+               fprintf(stderr,  "GPT:num_partition_entries don't match: "
+                      "0x%x != 0x%x\n",
+                      __le32_to_cpu(pgpt->num_partition_entries),
+                      __le32_to_cpu(agpt->num_partition_entries));
+       }
+       if (__le32_to_cpu(pgpt->sizeof_partition_entry) !=
+            __le32_to_cpu(agpt->sizeof_partition_entry)) {
+               error_found++;
+               fprintf(stderr, 
+                      "GPT:sizeof_partition_entry values don't match: "
+                      "0x%x != 0x%x\n",
+                       __le32_to_cpu(pgpt->sizeof_partition_entry),
+                      __le32_to_cpu(agpt->sizeof_partition_entry));
+       }
+       if (__le32_to_cpu(pgpt->partition_entry_array_crc32) !=
+            __le32_to_cpu(agpt->partition_entry_array_crc32)) {
+               error_found++;
+               fprintf(stderr, 
+                      "GPT:partition_entry_array_crc32 values don't match: "
+                      "0x%x != 0x%x\n",
+                       __le32_to_cpu(pgpt->partition_entry_array_crc32),
+                      __le32_to_cpu(agpt->partition_entry_array_crc32));
+       }
+       if (__le64_to_cpu(pgpt->alternate_lba) != lastlba) {
+               error_found++;
+               fprintf(stderr, 
+                      "GPT:Primary header thinks Alt. header is not at the end of the disk.\n");
+#ifdef DEBUG
+               fprintf(stderr,  "GPT:%" PRIx64 " != %" PRIx64 "\n",
+                      __le64_to_cpu(pgpt->alternate_lba), lastlba);
+#endif
+       }
+
+       if (__le64_to_cpu(agpt->my_lba) != lastlba) {
+               error_found++;
+               fprintf(stderr, 
+                      "GPT:Alternate GPT header not at the end of the disk.\n");
+#ifdef DEBUG
+               fprintf(stderr,  "GPT:%" PRIx64 " != %" PRIx64 "\n",
+                      __le64_to_cpu(agpt->my_lba), lastlba);
+#endif
+       }
+
+       if (error_found)
+               fprintf(stderr, 
+                      "GPT: Use GNU Parted to correct GPT errors.\n");
+       return;
+}
+
+/**
+ * find_valid_gpt() - Search disk for valid GPT headers and PTEs
+ * @fd  is an open file descriptor to the whole disk
+ * @gpt is a GPT header ptr, filled on return.
+ * @ptes is a PTEs ptr, filled on return.
+ * Description: Returns 1 if valid, 0 on error.
+ * If valid, returns pointers to newly allocated GPT header and PTEs.
+ * Validity depends on finding either the Primary GPT header and PTEs valid,
+ * or the Alternate GPT header and PTEs valid, and the PMBR valid.
+ */
+static int
+find_valid_gpt(int fd, gpt_header ** gpt, gpt_entry ** ptes)
+{
+        extern int force_gpt;
+       int good_pgpt = 0, good_agpt = 0, good_pmbr = 0;
+       gpt_header *pgpt = NULL, *agpt = NULL;
+       gpt_entry *pptes = NULL, *aptes = NULL;
+       legacy_mbr *legacymbr = NULL;
+       uint64_t lastlba;
+       if (!gpt || !ptes)
+               return 0;
+
+       if (!(lastlba = last_lba(fd)))
+               return 0;
+       good_pgpt = is_gpt_valid(fd, GPT_PRIMARY_PARTITION_TABLE_LBA,
+                                &pgpt, &pptes);
+        if (good_pgpt) {
+               good_agpt = is_gpt_valid(fd,
+                                         __le64_to_cpu(pgpt->alternate_lba),
+                                        &agpt, &aptes);
+                if (!good_agpt) {
+                        good_agpt = is_gpt_valid(fd, lastlba,
+                                                 &agpt, &aptes);
+                }
+        }
+        else {
+                good_agpt = is_gpt_valid(fd, lastlba,
+                                         &agpt, &aptes);
+        }
+
+        /* The obviously unsuccessful case */
+        if (!good_pgpt && !good_agpt) {
+                goto fail;
+        }
+
+       /* This will be added to the EFI Spec. per Intel after v1.02. */
+        legacymbr = malloc(sizeof (*legacymbr));
+        if (legacymbr) {
+                memset(legacymbr, 0, sizeof (*legacymbr));
+                read_lba(fd, 0, (uint8_t *) legacymbr,
+                         sizeof (*legacymbr));
+                good_pmbr = is_pmbr_valid(legacymbr);
+                free(legacymbr);
+                legacymbr=NULL;
+        }
+
+        /* Failure due to bad PMBR */
+        if ((good_pgpt || good_agpt) && !good_pmbr && !force_gpt) {
+                fprintf(stderr,
+                       "  Warning: Disk has a valid GPT signature "
+                       "but invalid PMBR.\n"
+                       "  Assuming this disk is *not* a GPT disk anymore.\n"
+                       "  Use gpt kernel option to override.  "
+                       "Use GNU Parted to correct disk.\n");
+                goto fail;
+        }
+
+        /* Would fail due to bad PMBR, but force GPT anyhow */
+        if ((good_pgpt || good_agpt) && !good_pmbr && force_gpt) {
+                fprintf(stderr, 
+                       "  Warning: Disk has a valid GPT signature but "
+                       "invalid PMBR.\n"
+                       "  Use GNU Parted to correct disk.\n"
+                       "  gpt option taken, disk treated as GPT.\n");
+        }
+
+        compare_gpts(pgpt, agpt, lastlba);
+
+        /* The good cases */
+        if (good_pgpt && (good_pmbr || force_gpt)) {
+                *gpt  = pgpt;
+                *ptes = pptes;
+                if (agpt)  { free(agpt);   agpt = NULL; }
+                if (aptes) { free(aptes); aptes = NULL; }
+                if (!good_agpt) {
+                        fprintf(stderr, 
+                              "Alternate GPT is invalid, "
+                               "using primary GPT.\n");
+                }
+                return 1;
+        }
+        else if (good_agpt && (good_pmbr || force_gpt)) {
+                *gpt  = agpt;
+                *ptes = aptes;
+                if (pgpt)  { free(pgpt);   pgpt = NULL; }
+                if (pptes) { free(pptes); pptes = NULL; }
+                fprintf(stderr, 
+                       "Primary GPT is invalid, using alternate GPT.\n");
+                return 1;
+        }
+
+ fail:
+        if (pgpt)  { free(pgpt);   pgpt=NULL; }
+        if (agpt)  { free(agpt);   agpt=NULL; }
+        if (pptes) { free(pptes); pptes=NULL; }
+        if (aptes) { free(aptes); aptes=NULL; }
+        *gpt = NULL;
+        *ptes = NULL;
+        return 0;
+}
+
+/**
+ * read_gpt_pt() 
+ * @fd
+ * @all - slice with start/size of whole disk
+ *
+ *  0 if this isn't our partition table
+ *  number of partitions if successful
+ *
+ */
+int
+read_gpt_pt (int fd, struct slice all, struct slice *sp, int ns)
+{
+       gpt_header *gpt = NULL;
+       gpt_entry *ptes = NULL;
+       uint32_t i;
+       int n = 0;
+        int last_used_index=-1;
+
+       if (!find_valid_gpt (fd, &gpt, &ptes) || !gpt || !ptes) {
+               if (gpt)
+                       free (gpt);
+               if (ptes)
+                       free (ptes);
+               return 0;
+       }
+
+       for (i = 0; i < __le32_to_cpu(gpt->num_partition_entries) && i < ns; i++) {
+               if (!efi_guidcmp (NULL_GUID, ptes[i].partition_type_guid)) {
+                       sp[n].start = 0;
+                       sp[n].size = 0;
+                       n++;
+               } else {
+                       sp[n].start = __le64_to_cpu(ptes[i].starting_lba);
+                       sp[n].size  = __le64_to_cpu(ptes[i].ending_lba) -
+                               __le64_to_cpu(ptes[i].starting_lba) + 1;
+                        last_used_index=n;
+                       n++;
+               }
+       }
+       free (ptes);
+       free (gpt);
+       return last_used_index+1;
+}
diff --git a/kpartx/gpt.h b/kpartx/gpt.h
new file mode 100644 (file)
index 0000000..a073b42
--- /dev/null
@@ -0,0 +1,131 @@
+/*
+    gpt.[ch]
+
+    Copyright (C) 2000-2001 Dell Computer Corporation <Matt_Domsch@dell.com> 
+
+    EFI GUID Partition Table handling
+    Per Intel EFI Specification v1.02
+    http://developer.intel.com/technology/efi/efi.htm
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#ifndef _GPT_H
+#define _GPT_H
+
+
+#include <inttypes.h>
+#include "kpartx.h"
+#include "dos.h"
+#include "efi.h"
+
+#define EFI_PMBR_OSTYPE_EFI 0xEF
+#define EFI_PMBR_OSTYPE_EFI_GPT 0xEE
+#define MSDOS_MBR_SIGNATURE 0xaa55
+#define GPT_BLOCK_SIZE 512
+
+#define GPT_HEADER_SIGNATURE 0x5452415020494645ULL
+#define GPT_HEADER_REVISION_V1_02 0x00010200
+#define GPT_HEADER_REVISION_V1_00 0x00010000
+#define GPT_HEADER_REVISION_V0_99 0x00009900
+#define GPT_PRIMARY_PARTITION_TABLE_LBA 1
+
+typedef struct _gpt_header {
+       uint64_t signature;
+       uint32_t revision;
+       uint32_t header_size;
+       uint32_t header_crc32;
+       uint32_t reserved1;
+       uint64_t my_lba;
+       uint64_t alternate_lba;
+       uint64_t first_usable_lba;
+       uint64_t last_usable_lba;
+       efi_guid_t disk_guid;
+       uint64_t partition_entry_lba;
+       uint32_t num_partition_entries;
+       uint32_t sizeof_partition_entry;
+       uint32_t partition_entry_array_crc32;
+       uint8_t reserved2[GPT_BLOCK_SIZE - 92];
+} __attribute__ ((packed)) gpt_header;
+
+typedef struct _gpt_entry_attributes {
+       uint64_t required_to_function:1;
+       uint64_t reserved:47;
+        uint64_t type_guid_specific:16;
+} __attribute__ ((packed)) gpt_entry_attributes;
+
+typedef struct _gpt_entry {
+       efi_guid_t partition_type_guid;
+       efi_guid_t unique_partition_guid;
+       uint64_t starting_lba;
+       uint64_t ending_lba;
+       gpt_entry_attributes attributes;
+       efi_char16_t partition_name[72 / sizeof(efi_char16_t)];
+} __attribute__ ((packed)) gpt_entry;
+
+
+/* 
+   These values are only defaults.  The actual on-disk structures
+   may define different sizes, so use those unless creating a new GPT disk!
+*/
+
+#define GPT_DEFAULT_RESERVED_PARTITION_ENTRY_ARRAY_SIZE 16384
+/* 
+   Number of actual partition entries should be calculated
+   as: 
+*/
+#define GPT_DEFAULT_RESERVED_PARTITION_ENTRIES \
+        (GPT_DEFAULT_RESERVED_PARTITION_ENTRY_ARRAY_SIZE / \
+         sizeof(gpt_entry))
+
+
+/* Protected Master Boot Record  & Legacy MBR share same structure */
+/* Needs to be packed because the u16s force misalignment. */
+
+typedef struct _legacy_mbr {
+       uint8_t bootcode[440];
+       uint32_t unique_mbr_signature;
+       uint16_t unknown;
+       struct partition partition[4];
+       uint16_t signature;
+} __attribute__ ((packed)) legacy_mbr;
+
+
+#define EFI_GPT_PRIMARY_PARTITION_TABLE_LBA 1
+
+/* Functions */
+int read_gpt_pt (int fd, struct slice all, struct slice *sp, int ns);
+
+
+#endif
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4 
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/kpartx/kpartx.8 b/kpartx/kpartx.8
new file mode 100644 (file)
index 0000000..923be1e
--- /dev/null
@@ -0,0 +1,42 @@
+.TH KPARTX 8 "July 2006" "" "Linux Administrator's Manual"
+.SH NAME
+kpartx \- Create device maps from partition tables
+.SH SYNOPSIS
+.B kpartx
+.RB [\| \-a\ \c
+.BR |\ -d\ |\ -l \|]
+.RB [\| \-v \|]
+.RB wholedisk
+.SH DESCRIPTION
+This tool, derived from util-linux' partx, reads partition
+tables on specified device and create device maps over partitions 
+segments detected. It is called from hotplug upon device maps 
+creation and deletion.
+.SH OPTIONS
+.TP
+.B \-a
+Add partition mappings
+.TP
+.B \-d
+Delete partition mappings
+.TP
+.B \-l
+List partition mappings that would be added -a
+.TP
+.B \-p
+set device name-partition number delimiter
+.TP
+.B \-g
+force GUID partition table (GPT)
+.TP
+.B \-v
+Operate verbosely
+.SH "SEE ALSO"
+.BR multipath (8)
+.BR multipathd (8)
+.BR hotplug (8)
+.SH "AUTHORS"
+This man page was assembled By Patrick Caulfield
+for the Debian project. From documentation provided
+by the multipath author Christophe Varoqui, <christophe.varoqui@opensvc.com> and others.
+
diff --git a/kpartx/kpartx.c b/kpartx/kpartx.c
new file mode 100644 (file)
index 0000000..3d33990
--- /dev/null
@@ -0,0 +1,644 @@
+/*
+ * Source: copy of util-linux' partx partx.c
+ *
+ * Copyrights of the original file applies
+ * Copyright (c) 2004, 2005 Christophe Varoqui
+ * Copyright (c) 2005 Kiyoshi Ueda
+ * Copyright (c) 2005 Lars Soltau
+ */
+
+/*
+ * Given a block device and a partition table type,
+ * try to parse the partition table, and list the
+ * contents. Optionally add or remove partitions.
+ *
+ * Read wholedisk and add all partitions:
+ *     kpartx [-a|-d|-l] [-v] wholedisk
+ *
+ * aeb, 2000-03-21
+ * cva, 2002-10-26
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <ctype.h>
+#include <libdevmapper.h>
+
+#include "devmapper.h"
+#include "crc32.h"
+#include "lopart.h"
+#include "kpartx.h"
+
+#define SIZE(a) (sizeof(a)/sizeof((a)[0]))
+
+#define READ_SIZE      1024
+#define MAXTYPES       64
+#define MAXSLICES      256
+#define DM_TARGET      "linear"
+#define LO_NAME_SIZE    64
+#define PARTNAME_SIZE  128
+#define DELIM_SIZE     8
+
+struct slice slices[MAXSLICES];
+
+enum action { LIST, ADD, DELETE };
+
+struct pt {
+       char *type;
+       ptreader *fn;
+} pts[MAXTYPES];
+
+int ptct = 0;
+
+static void
+addpts(char *t, ptreader f)
+{
+       if (ptct >= MAXTYPES) {
+               fprintf(stderr, "addpts: too many types\n");
+               exit(1);
+       }
+       pts[ptct].type = t;
+       pts[ptct].fn = f;
+       ptct++;
+}
+
+static void
+initpts(void)
+{
+       addpts("gpt", read_gpt_pt);
+       addpts("dos", read_dos_pt);
+       addpts("bsd", read_bsd_pt);
+       addpts("solaris", read_solaris_pt);
+       addpts("unixware", read_unixware_pt);
+       addpts("dasd", read_dasd_pt);
+       addpts("mac", read_mac_pt);
+       addpts("sun", read_sun_pt);
+}
+
+static char short_opts[] = "ladgvp:t:s";
+
+/* Used in gpt.c */
+int force_gpt=0;
+
+static int
+usage(void) {
+       printf("usage : kpartx [-a|-d|-l] [-v] wholedisk\n");
+       printf("\t-a add partition devmappings\n");
+       printf("\t-d del partition devmappings\n");
+       printf("\t-l list partitions devmappings that would be added by -a\n");
+       printf("\t-p set device name-partition number delimiter\n");
+       printf("\t-g force GUID partition table (GPT)\n");
+       printf("\t-v verbose\n");
+       printf("\t-s sync mode. Don't return until the partitions are created\n");
+       return 1;
+}
+
+static void
+set_delimiter (char * device, char * delimiter)
+{
+       char * p = device;
+
+       while (*(p++) != 0x0)
+               continue;
+
+       if (isdigit(*(p - 2)))
+               *delimiter = 'p';
+}
+
+static void
+strip_slash (char * device)
+{
+       char * p = device;
+
+       while (*(p++) != 0x0) {
+
+               if (*p == '/')
+                       *p = '!';
+       }
+}
+
+static int
+find_devname_offset (char * device)
+{
+       char *p, *q = NULL;
+
+       p = device;
+
+       while (*p++)
+               if (*p == '/')
+                       q = p;
+
+       return (int)(q - device) + 1;
+}
+
+static char *
+get_hotplug_device(void)
+{
+       unsigned int major, minor, off, len;
+       const char *mapname;
+       char *devname = NULL;
+       char *device = NULL;
+       char *var = NULL;
+       struct stat buf;
+
+       var = getenv("ACTION");
+
+       if (!var || strcmp(var, "add"))
+               return NULL;
+
+       /* Get dm mapname for hotpluged device. */
+       if (!(devname = getenv("DEVNAME")))
+               return NULL;
+
+       if (stat(devname, &buf))
+               return NULL;
+
+       major = (unsigned int)MAJOR(buf.st_rdev);
+       minor = (unsigned int)MINOR(buf.st_rdev);
+
+       if (!(mapname = dm_mapname(major, minor))) /* Not dm device. */
+               return NULL;
+
+       off = find_devname_offset(devname);
+       len = strlen(mapname);
+
+       /* Dirname + mapname + \0 */
+       if (!(device = (char *)malloc(sizeof(char) * (off + len + 1))))
+               return NULL;
+
+       /* Create new device name. */
+       snprintf(device, off + 1, "%s", devname);
+       snprintf(device + off, len + 1, "%s", mapname);
+
+       if (strlen(device) != (off + len))
+               return NULL;
+
+       return device;
+}
+
+int
+main(int argc, char **argv){
+       int fd, i, j, m, n, op, off, arg, c, d;
+       struct slice all;
+       struct pt *ptp;
+       enum action what = LIST;
+       char *type, *diskdevice, *device, *progname;
+       int verbose = 0;
+       char partname[PARTNAME_SIZE], params[PARTNAME_SIZE + 16];
+       char * loopdev = NULL;
+       char * delim = NULL;
+       char *uuid = NULL;
+       char *mapname = NULL;
+       int loopro = 0;
+       int hotplug = 0;
+       int loopcreated = 0;
+       int sync = 0;
+       struct stat buf;
+       uint32_t cookie = 0;
+
+       initpts();
+       init_crc32();
+
+       type = device = diskdevice = NULL;
+       memset(&all, 0, sizeof(all));
+       memset(&partname, 0, sizeof(partname));
+
+       /* Check whether hotplug mode. */
+       progname = strrchr(argv[0], '/');
+
+       if (!progname)
+               progname = argv[0];
+       else
+               progname++;
+
+       if (!strcmp(progname, "kpartx.dev")) { /* Hotplug mode */
+               hotplug = 1;
+
+               /* Setup for original kpartx variables */
+               if (!(device = get_hotplug_device()))
+                       exit(1);
+
+               diskdevice = device;
+               what = ADD;
+       } else if (argc < 2) {
+               usage();
+               exit(1);
+       }
+
+       while ((arg = getopt(argc, argv, short_opts)) != EOF) switch(arg) {
+               case 'g':
+                       force_gpt=1;
+                       break;
+               case 't':
+                       type = optarg;
+                       break;
+               case 'v':
+                       verbose = 1;
+                       break;
+               case 'p':
+                       delim = optarg;
+                       break;
+               case 'l':
+                       what = LIST;
+                       break;
+               case 'a':
+                       what = ADD;
+                       break;
+               case 'd':
+                       what = DELETE;
+                       break;
+               case 's':
+                       sync = 1;
+                       break;
+               default:
+                       usage();
+                       exit(1);
+       }
+
+       if (!sync)
+               dm_udev_set_sync_support(0);
+
+       if (dm_prereq(DM_TARGET, 0, 0, 0) && (what == ADD || what == DELETE)) {
+               fprintf(stderr, "device mapper prerequisites not met\n");
+               exit(1);
+       }
+
+       if (hotplug) {
+               /* already got [disk]device */
+       } else if (optind == argc-2) {
+               device = argv[optind];
+               diskdevice = argv[optind+1];
+       } else if (optind == argc-1) {
+               diskdevice = device = argv[optind];
+       } else {
+               usage();
+               exit(1);
+       }
+
+       if (stat(device, &buf)) {
+               printf("failed to stat() %s\n", device);
+               exit (1);
+       }
+
+       if (S_ISREG (buf.st_mode)) {
+               /* already looped file ? */
+               loopdev = find_loop_by_file(device);
+
+               if (!loopdev && what == DELETE)
+                       exit (0);
+
+               if (!loopdev) {
+                       loopdev = find_unused_loop_device();
+
+                       if (set_loop(loopdev, device, 0, &loopro)) {
+                               fprintf(stderr, "can't set up loop\n");
+                               exit (1);
+                       }
+                       loopcreated = 1;
+               }
+               device = loopdev;
+       }
+
+       if (delim == NULL) {
+               delim = malloc(DELIM_SIZE);
+               memset(delim, 0, DELIM_SIZE);
+               set_delimiter(device, delim);
+       }
+
+       off = find_devname_offset(device);
+
+       if (!loopdev) {
+               uuid = dm_mapuuid((unsigned int)MAJOR(buf.st_rdev),
+                                 (unsigned int)MINOR(buf.st_rdev));
+               mapname = dm_mapname((unsigned int)MAJOR(buf.st_rdev),
+                                    (unsigned int)MINOR(buf.st_rdev));
+       }
+
+       if (!uuid)
+               uuid = device + off;
+
+       if (!mapname)
+               mapname = device + off;
+
+       fd = open(device, O_RDONLY);
+
+       if (fd == -1) {
+               perror(device);
+               exit(1);
+       }
+
+       /* add/remove partitions to the kernel devmapper tables */
+       int r = 0;
+       for (i = 0; i < ptct; i++) {
+               ptp = &pts[i];
+
+               if (type && strcmp(type, ptp->type))
+                       continue;
+
+               /* here we get partitions */
+               n = ptp->fn(fd, all, slices, SIZE(slices));
+
+#ifdef DEBUG
+               if (n >= 0)
+                       printf("%s: %d slices\n", ptp->type, n);
+#endif
+
+               if (n > 0)
+                       close(fd);
+               else
+                       continue;
+
+               switch(what) {
+               case LIST:
+                       for (j = 0, c = 0, m = 0; j < n; j++) {
+                               if (slices[j].size == 0)
+                                       continue;
+                               if (slices[j].container > 0) {
+                                       c++;
+                                       continue;
+                               }
+
+                               slices[j].minor = m++;
+
+                               printf("%s%s%d : 0 %" PRIu64 " %s %" PRIu64"\n",
+                                      mapname, delim, j+1,
+                                      slices[j].size, device,
+                                      slices[j].start);
+                       }
+                       /* Loop to resolve contained slices */
+                       d = c;
+                       while (c) {
+                               for (j = 0; j < n; j++) {
+                                       uint64_t start;
+                                       int k = slices[j].container - 1;
+
+                                       if (slices[j].size == 0)
+                                               continue;
+                                       if (slices[j].minor > 0)
+                                               continue;
+                                       if (slices[j].container == 0)
+                                               continue;
+                                       slices[j].minor = m++;
+
+                                       start = slices[j].start - slices[k].start;
+                                       printf("%s%s%d : 0 %" PRIu64 " /dev/dm-%d %" PRIu64 "\n",
+                                              mapname, delim, j+1,
+                                              slices[j].size,
+                                              slices[k].minor, start);
+                                       c--;
+                               }
+                               /* Terminate loop if nothing more to resolve */
+                               if (d == c)
+                                       break;
+                       }
+
+                       if (loopcreated && S_ISREG (buf.st_mode)) {
+                               if (del_loop(device)) {
+                                       if (verbose)
+                                               printf("can't del loop : %s\n",
+                                                       device);
+                                       exit(1);
+                               }
+                               printf("loop deleted : %s\n", device);
+                       }
+                       break;
+
+               case DELETE:
+                       for (j = n-1; j >= 0; j--) {
+                               if (safe_sprintf(partname, "%s%s%d",
+                                            mapname, delim, j+1)) {
+                                       fprintf(stderr, "partname too small\n");
+                                       exit(1);
+                               }
+                               strip_slash(partname);
+
+                               if (!slices[j].size || !dm_map_present(partname))
+                                       continue;
+
+                               if (!dm_simplecmd(DM_DEVICE_REMOVE, partname,
+                                                 0, &cookie)) {
+                                       r++;
+                                       continue;
+                               }
+                               if (verbose)
+                                       printf("del devmap : %s\n", partname);
+                       }
+
+                       if (S_ISREG (buf.st_mode)) {
+                               if (del_loop(device)) {
+                                       if (verbose)
+                                               printf("can't del loop : %s\n",
+                                                       device);
+                                       exit(1);
+                               }
+                               printf("loop deleted : %s\n", device);
+                       }
+                       break;
+
+               case ADD:
+                       for (j = 0, c = 0; j < n; j++) {
+                               if (slices[j].size == 0)
+                                       continue;
+
+                               /* Skip all contained slices */
+                               if (slices[j].container > 0) {
+                                       c++;
+                                       continue;
+                               }
+
+                               if (safe_sprintf(partname, "%s%s%d",
+                                            mapname, delim, j+1)) {
+                                       fprintf(stderr, "partname too small\n");
+                                       exit(1);
+                               }
+                               strip_slash(partname);
+
+                               if (safe_sprintf(params, "%s %" PRIu64 ,
+                                                device, slices[j].start)) {
+                                       fprintf(stderr, "params too small\n");
+                                       exit(1);
+                               }
+
+                               op = (dm_map_present(partname) ?
+                                       DM_DEVICE_RELOAD : DM_DEVICE_CREATE);
+
+                               if (!dm_addmap(op, partname, DM_TARGET, params,
+                                              slices[j].size, uuid, j+1,
+                                              buf.st_mode & 0777, buf.st_uid,
+                                              buf.st_gid, &cookie)) {
+                                       fprintf(stderr, "create/reload failed on %s\n",
+                                               partname);
+                                       r++;
+                               }
+                               if (op == DM_DEVICE_RELOAD &&
+                                   !dm_simplecmd(DM_DEVICE_RESUME, partname,
+                                                 1, &cookie)) {
+                                       fprintf(stderr, "resume failed on %s\n",
+                                               partname);
+                                       r++;
+                               }
+                               dm_devn(partname, &slices[j].major,
+                                       &slices[j].minor);
+
+                               if (verbose)
+                                       printf("add map %s (%d:%d): 0 %" PRIu64 " %s %s\n",
+                                              partname, slices[j].major,
+                                              slices[j].minor, slices[j].size,
+                                              DM_TARGET, params);
+                       }
+                       /* Loop to resolve contained slices */
+                       d = c;
+                       while (c) {
+                               for (j = 0; j < n; j++) {
+                                       uint64_t start;
+                                       int k = slices[j].container - 1;
+
+                                       if (slices[j].size == 0)
+                                               continue;
+
+                                       /* Skip all existing slices */
+                                       if (slices[j].minor > 0)
+                                               continue;
+
+                                       /* Skip all simple slices */
+                                       if (slices[j].container == 0)
+                                               continue;
+
+                                       /* Check container slice */
+                                       if (slices[k].size == 0)
+                                               fprintf(stderr, "Invalid slice %d\n",
+                                                       k);
+
+                                       if (safe_sprintf(partname, "%s%s%d",
+                                                        mapname, delim, j+1)) {
+                                               fprintf(stderr, "partname too small\n");
+                                               exit(1);
+                                       }
+                                       strip_slash(partname);
+
+                                       start = slices[j].start - slices[k].start;
+                                       if (safe_sprintf(params, "%d:%d %" PRIu64,
+                                                        slices[k].major,
+                                                        slices[k].minor,
+                                                        start)) {
+                                               fprintf(stderr, "params too small\n");
+                                               exit(1);
+                                       }
+
+                                       op = (dm_map_present(partname) ?
+                                             DM_DEVICE_RELOAD : DM_DEVICE_CREATE);
+
+                                       dm_addmap(op, partname, DM_TARGET, params,
+                                                 slices[j].size, uuid, j+1,
+                                                 buf.st_mode & 0777,
+                                                 buf.st_uid, buf.st_gid,
+                                                 &cookie);
+
+                                       if (op == DM_DEVICE_RELOAD)
+                                               dm_simplecmd(DM_DEVICE_RESUME,
+                                                            partname, 1,
+                                                            &cookie);
+
+                                       dm_devn(partname, &slices[j].major,
+                                               &slices[j].minor);
+
+                                       if (verbose)
+                                               printf("add map %s : 0 %" PRIu64 " %s %s\n",
+                                                      partname, slices[j].size,
+                                                      DM_TARGET, params);
+                                       c--;
+                               }
+                               /* Terminate loop */
+                               if (d == c)
+                                       break;
+                       }
+                       break;
+
+               default:
+                       break;
+
+               }
+               if (n > 0)
+                       break;
+       }
+       dm_udev_wait(cookie);
+       dm_lib_release();
+       dm_lib_exit();
+
+       return r;
+}
+
+void *
+xmalloc (size_t size) {
+       void *t;
+
+       if (size == 0)
+               return NULL;
+
+       t = malloc (size);
+
+       if (t == NULL) {
+               fprintf(stderr, "Out of memory\n");
+               exit(1);
+       }
+
+       return t;
+}
+
+/*
+ * sseek: seek to specified sector
+ */
+
+static int
+sseek(int fd, unsigned int secnr) {
+       off64_t in, out;
+       in = ((off64_t) secnr << 9);
+       out = 1;
+
+       if ((out = lseek64(fd, in, SEEK_SET)) != in)
+       {
+               fprintf(stderr, "llseek error\n");
+               return -1;
+       }
+       return 0;
+}
+
+static
+struct block {
+       unsigned int secnr;
+       char *block;
+       struct block *next;
+} *blockhead;
+
+char *
+getblock (int fd, unsigned int secnr) {
+       struct block *bp;
+
+       for (bp = blockhead; bp; bp = bp->next)
+
+               if (bp->secnr == secnr)
+                       return bp->block;
+
+       if (sseek(fd, secnr))
+               return NULL;
+
+       bp = xmalloc(sizeof(struct block));
+       bp->secnr = secnr;
+       bp->next = blockhead;
+       blockhead = bp;
+       bp->block = (char *) xmalloc(READ_SIZE);
+
+       if (read(fd, bp->block, READ_SIZE) != READ_SIZE) {
+               fprintf(stderr, "read error, sector %d\n", secnr);
+               bp->block = NULL;
+       }
+
+       return bp->block;
+}
diff --git a/kpartx/kpartx.h b/kpartx/kpartx.h
new file mode 100644 (file)
index 0000000..43ae3f8
--- /dev/null
@@ -0,0 +1,50 @@
+#ifndef _KPARTX_H
+#define _KPARTX_H
+
+#include <stdint.h>
+
+/*
+ * For each partition type there is a routine that takes
+ * a block device and a range, and returns the list of
+ * slices found there in the supplied array SP that can
+ * hold NS entries. The return value is the number of
+ * entries stored, or -1 if the appropriate type is not
+ * present.
+ */
+
+#define likely(x)       __builtin_expect(!!(x), 1)
+#define unlikely(x)     __builtin_expect(!!(x), 0)
+
+#define safe_sprintf(var, format, args...)     \
+       snprintf(var, sizeof(var), format, ##args) >= sizeof(var)
+
+/*
+ * units: 512 byte sectors
+ */
+struct slice {
+       uint64_t start;
+       uint64_t size;
+       int container;
+       int major;
+       int minor;
+};
+
+typedef int (ptreader)(int fd, struct slice all, struct slice *sp, int ns);
+
+extern ptreader read_dos_pt;
+extern ptreader read_bsd_pt;
+extern ptreader read_solaris_pt;
+extern ptreader read_unixware_pt;
+extern ptreader read_gpt_pt;
+extern ptreader read_dasd_pt;
+extern ptreader read_mac_pt;
+extern ptreader read_sun_pt;
+
+char *getblock(int fd, unsigned int secnr);
+
+static inline int
+four2int(unsigned char *p) {
+       return p[0] + (p[1]<<8) + (p[2]<<16) + (p[3]<<24);
+}
+
+#endif /* _KPARTX_H */
diff --git a/kpartx/kpartx.rules b/kpartx/kpartx.rules
new file mode 100644 (file)
index 0000000..8978b73
--- /dev/null
@@ -0,0 +1,36 @@
+#
+# persistent links for device-mapper devices
+# only hardware-backed device-mapper devices (ie multipath, dmraid,
+# and kpartx) have meaningful persistent device names
+#
+
+KERNEL!="dm-*", GOTO="kpartx_end"
+ACTION=="remove", GOTO="kpartx_end"
+
+ENV{DM_TABLE_STATE}!="LIVE", GOTO="kpartx_end"
+
+ENV{DM_UUID}=="?*", IMPORT{program}=="kpartx_id %M %m $env{DM_UUID}"
+
+OPTIONS="link_priority=50"
+
+# Create persistent links for multipath tables
+ENV{DM_UUID}=="mpath-*", \
+       SYMLINK+="disk/by-id/$env{DM_TYPE}-$env{DM_NAME}"
+
+# Create persistent links for dmraid tables
+ENV{DM_UUID}=="dmraid-*", \
+        SYMLINK+="disk/by-id/$env{DM_TYPE}-$env{DM_NAME}"
+
+# Create persistent links for partitions
+ENV{DM_PART}=="?*", \
+        SYMLINK+="disk/by-id/$env{DM_TYPE}-$env{DM_NAME}-part$env{DM_PART}"
+
+# Create dm tables for partitions
+ENV{DM_STATE}=="ACTIVE", ENV{DM_UUID}=="mpath-*", \
+        RUN+="/sbin/kpartx -a -p -part /dev/$name"
+ENV{DM_STATE}=="ACTIVE", ENV{DM_UUID}=="dmraid-*", \
+        RUN+="/sbin/kpartx -a -p -part /dev/$name"
+
+LABEL="kpartx_end"
+
+
diff --git a/kpartx/kpartx_id b/kpartx/kpartx_id
new file mode 100644 (file)
index 0000000..81f32bf
--- /dev/null
@@ -0,0 +1,93 @@
+#!/bin/sh
+#
+# kpartx_id
+#
+# Generates ID information for device-mapper tables.
+#
+# Copyright (C) 2006 SUSE Linux Products GmbH
+# Author:
+#       Hannes Reinecke <hare@suse.de>
+#
+#
+#       This program is free software; you can redistribute it and/or modify it
+#       under the terms of the GNU General Public License as published by the
+#       Free Software Foundation version 2 of the License.
+#
+# This script generates ID information used to generate persistent symlinks.
+# It relies on the UUID strings generated by the various programs; the name
+# of the tables are of no consequence.
+#
+# Please note that dmraid does not provide the UUIDs (yet); a patch has been
+# sent upstream but has not been accepted yet.
+#
+
+DMSETUP=/sbin/dmsetup
+
+MAJOR=$1
+MINOR=$2
+UUID=$3
+
+if [ -z "$MAJOR" -o -z "$MINOR" ]; then
+    echo "usage: $0 major minor"
+    exit 1;
+fi
+
+# Device-mapper not installed; not an error
+if [ ! -x $DMSETUP ] ; then
+    exit 0
+fi
+
+
+# Table UUIDs are always '<type>-<uuid>'.
+dmuuid=${UUID#*-}
+dmtbl=${UUID%%-*}
+dmpart=${dmtbl#part}
+# kpartx types are 'part<num>'
+if [ "$dmpart" = "$dmtbl" ] ; then
+    dmpart=
+else
+    dmtbl=part
+fi
+
+# Set the name of the table. We're only interested in dmraid,
+# multipath, and kpartx tables; everything else is ignored.
+if [ "$dmtbl" = "part" ] ; then
+    # The name of the kpartx table is the name of the parent table
+    dmname=$($DMSETUP info  -c --noheadings -o name -u $dmuuid)
+    echo "DM_NAME=$dmname"
+    # We need the dependencies of the parent table to figure out
+    # the type if the parent is a multipath table
+    case "$dmuuid" in
+       mpath-*)
+           dmdeps=$($DMSETUP deps -u $dmuuid)
+           ;;
+    esac
+elif [ "$dmtbl" = "mpath" ] ; then
+    dmname=$tblname
+    # We need the dependencies of the table to figure out the type
+    dmdeps=$($DMSETUP deps -u $UUID)
+elif [ "$dmtbl" = "dmraid" ] ; then
+    dmname=$tblname
+fi
+
+[ -n "$dmpart" ] && echo "DM_PART=$dmpart"
+
+# Figure out the type of the map. For non-multipath maps it's
+# always 'raid'.
+if [ -n "$dmdeps" ] ; then
+    case "$dmdeps" in
+       *\(94,*)
+            echo "DM_TYPE=dasd"
+           ;;
+       *\(9*)
+            echo "DM_TYPE=raid"
+           ;;
+       *)
+            echo "DM_TYPE=scsi"
+           ;;
+    esac
+else
+    echo "DM_TYPE=raid"
+fi
+
+exit 0
diff --git a/kpartx/lopart.c b/kpartx/lopart.c
new file mode 100644 (file)
index 0000000..79d8328
--- /dev/null
@@ -0,0 +1,306 @@
+/* Taken from Ted's losetup.c - Mitch <m.dsouza@mrc-apu.cam.ac.uk> */
+/* Added vfs mount options - aeb - 960223 */
+/* Removed lomount - aeb - 960224 */
+
+/* 1999-02-22 Arkadiusz Mi¶kiewicz <misiek@pld.ORG.PL>
+ * - added Native Language Support
+ * Sun Mar 21 1999 - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ * - fixed strerr(errno) in gettext calls
+ */
+
+#define PROC_DEVICES   "/proc/devices"
+
+/*
+ * losetup.c - setup and control loop devices
+ */
+
+#include "kpartx.h"
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sysmacros.h>
+
+#if defined(__hppa__) || defined(__powerpc64__) || defined (__alpha__) \
+ || defined (__x86_64__)
+typedef unsigned long __kernel_old_dev_t;
+#elif defined(__powerpc__) || defined(__ia64__) || (defined(__sparc__) && defined (__arch64__))
+typedef unsigned int __kernel_old_dev_t;
+#else
+typedef unsigned short __kernel_old_dev_t;
+#endif
+
+#define dev_t __kernel_old_dev_t
+
+#include <linux/loop.h>
+
+#include "lopart.h"
+#include "xstrncpy.h"
+
+#if !defined (__alpha__) && !defined (__ia64__) && !defined (__x86_64__) \
+        && !defined (__s390x__)
+#define int2ptr(x)     ((void *) ((int) x))
+#else
+#define int2ptr(x)     ((void *) ((long) x))
+#endif
+
+static char *
+xstrdup (const char *s)
+{
+       char *t;
+
+       if (s == NULL)
+               return NULL;
+
+       t = strdup (s);
+
+       if (t == NULL) {
+               fprintf(stderr, "not enough memory");
+               exit(1);
+       }
+
+       return t;
+}
+
+extern int
+is_loop_device (const char *device)
+{
+       struct stat statbuf;
+       int loopmajor;
+#if 1
+       loopmajor = 7;
+#else
+       FILE *procdev;
+       char line[100], *cp;
+
+       loopmajor = 0;
+
+       if ((procdev = fopen(PROC_DEVICES, "r")) != NULL) {
+               
+               while (fgets (line, sizeof(line), procdev)) {
+                       
+                       if ((cp = strstr (line, " loop\n")) != NULL) {
+                               *cp='\0';
+                               loopmajor=atoi(line);
+                               break;
+                       }
+               }
+
+               fclose(procdev);
+       }
+#endif
+       return (loopmajor && stat(device, &statbuf) == 0 &&
+               S_ISBLK(statbuf.st_mode) &&
+               major(statbuf.st_rdev) == loopmajor);
+}
+
+#define SIZE(a) (sizeof(a)/sizeof(a[0]))
+
+extern char *
+find_loop_by_file (const char * filename)
+{
+       char dev[64];
+       char *loop_formats[] = { "/dev/loop%d", "/dev/loop/%d" };
+       int i, j, fd;
+       struct stat statbuf;
+       struct loop_info loopinfo;
+
+       for (j = 0; j < SIZE(loop_formats); j++) {
+
+               for (i = 0; i < 256; i++) {
+                       sprintf (dev, loop_formats[j], i);
+
+                       if (stat (dev, &statbuf) != 0 ||
+                           !S_ISBLK(statbuf.st_mode))
+                               continue;
+
+                       fd = open (dev, O_RDONLY);
+
+                       if (fd < 0)
+                               break;
+
+                       if (ioctl (fd, LOOP_GET_STATUS, &loopinfo) != 0) {
+                               close (fd);
+                               continue;
+                       }
+
+                       if (0 == strcmp(filename, loopinfo.lo_name)) {
+                               close (fd);
+                               return xstrdup(dev); /*found */
+                       }
+
+                       close (fd);
+                       continue;
+               }
+       }
+       return NULL;
+}
+
+extern char *
+find_unused_loop_device (void)
+{
+       /* Just creating a device, say in /tmp, is probably a bad idea -
+          people might have problems with backup or so.
+          So, we just try /dev/loop[0-7]. */
+
+       char dev[20];
+       char *loop_formats[] = { "/dev/loop%d", "/dev/loop/%d" };
+       int i, j, fd, somedev = 0, someloop = 0, loop_known = 0;
+       struct stat statbuf;
+       struct loop_info loopinfo;
+       FILE *procdev;
+
+       for (j = 0; j < SIZE(loop_formats); j++) {
+
+           for(i = 0; i < 256; i++) {
+               sprintf(dev, loop_formats[j], i);
+
+               if (stat (dev, &statbuf) == 0 && S_ISBLK(statbuf.st_mode)) {
+                       somedev++;
+                       fd = open (dev, O_RDONLY);
+
+                       if (fd >= 0) {
+
+                               if(ioctl (fd, LOOP_GET_STATUS, &loopinfo) == 0)
+                                       someloop++;             /* in use */
+
+                               else if (errno == ENXIO) {
+                                       close (fd);
+                                       return xstrdup(dev);/* probably free */
+                               }
+
+                               close (fd);
+                       }
+                       
+                       /* continue trying as long as devices exist */
+                       continue;
+               }
+               break;
+           }
+       }
+
+       /* Nothing found. Why not? */
+       if ((procdev = fopen(PROC_DEVICES, "r")) != NULL) {
+               char line[100];
+
+               while (fgets (line, sizeof(line), procdev))
+
+                       if (strstr (line, " loop\n")) {
+                               loop_known = 1;
+                               break;
+                       }
+
+               fclose(procdev);
+
+               if (!loop_known)
+                       loop_known = -1;
+       }
+
+       if (!somedev)
+               fprintf(stderr, "mount: could not find any device /dev/loop#");
+
+       else if (!someloop) {
+
+           if (loop_known == 1)
+               fprintf(stderr,
+                   "mount: Could not find any loop device.\n"
+                   "       Maybe /dev/loop# has a wrong major number?");
+           
+           else if (loop_known == -1)
+               fprintf(stderr,
+                   "mount: Could not find any loop device, and, according to %s,\n"
+                   "       this kernel does not know about the loop device.\n"
+                   "       (If so, then recompile or `insmod loop.o'.)",
+                     PROC_DEVICES);
+
+           else
+               fprintf(stderr,
+                   "mount: Could not find any loop device. Maybe this kernel does not know\n"
+                   "       about the loop device (then recompile or `insmod loop.o'), or\n"
+                   "       maybe /dev/loop# has the wrong major number?");
+
+       } else
+               fprintf(stderr, "mount: could not find any free loop device");
+       
+       return 0;
+}
+
+extern int
+set_loop (const char *device, const char *file, int offset, int *loopro)
+{
+       struct loop_info loopinfo;
+       int fd, ffd, mode;
+
+       mode = (*loopro ? O_RDONLY : O_RDWR);
+
+       if ((ffd = open (file, mode)) < 0) {
+
+               if (!*loopro && errno == EROFS)
+                       ffd = open (file, mode = O_RDONLY);
+
+               if (ffd < 0) {
+                       perror (file);
+                       return 1;
+               }
+       }
+
+       if ((fd = open (device, mode)) < 0) {
+               perror (device);
+               return 1;
+       }
+
+       *loopro = (mode == O_RDONLY);
+       memset (&loopinfo, 0, sizeof (loopinfo));
+
+       xstrncpy (loopinfo.lo_name, file, LO_NAME_SIZE);
+       loopinfo.lo_offset = offset;
+       loopinfo.lo_encrypt_type = LO_CRYPT_NONE;
+       loopinfo.lo_encrypt_key_size = 0;
+
+       if (ioctl (fd, LOOP_SET_FD, int2ptr(ffd)) < 0) {
+               perror ("ioctl: LOOP_SET_FD");
+               close (fd);
+               close (ffd);
+               return 1;
+       }
+
+       if (ioctl (fd, LOOP_SET_STATUS, &loopinfo) < 0) {
+               (void) ioctl (fd, LOOP_CLR_FD, 0);
+               perror ("ioctl: LOOP_SET_STATUS");
+               close (fd);
+               close (ffd);
+               return 1;
+       }
+
+       close (fd);
+       close (ffd);
+       return 0;
+}
+
+extern int 
+del_loop (const char *device)
+{
+       int fd;
+
+       if ((fd = open (device, O_RDONLY)) < 0) {
+               int errsv = errno;
+               fprintf(stderr, "loop: can't delete device %s: %s\n",
+                       device, strerror (errsv));
+               return 1;
+       }
+
+       if (ioctl (fd, LOOP_CLR_FD, 0) < 0) {
+               perror ("ioctl: LOOP_CLR_FD");
+               close (fd);
+               return 1;
+       }
+
+       close (fd);
+       return 0;
+}
diff --git a/kpartx/lopart.h b/kpartx/lopart.h
new file mode 100644 (file)
index 0000000..a512353
--- /dev/null
@@ -0,0 +1,6 @@
+extern int verbose;
+extern int set_loop (const char *, const char *, int, int *);
+extern int del_loop (const char *);
+extern int is_loop_device (const char *);
+extern char * find_unused_loop_device (void);
+extern char * find_loop_by_file (const char *);
diff --git a/kpartx/mac.c b/kpartx/mac.c
new file mode 100644 (file)
index 0000000..5432e67
--- /dev/null
@@ -0,0 +1,47 @@
+#include "kpartx.h"
+#include "byteorder.h"
+#include <stdio.h>
+#include <string.h>
+#include "mac.h"
+
+int
+read_mac_pt(int fd, struct slice all, struct slice *sp, int ns) {
+       struct mac_driver_desc *md;
+        struct mac_partition *part;
+       unsigned secsize;
+       char *data;
+       int blk, blocks_in_map;
+        int n = 0;
+
+       md = (struct mac_driver_desc *) getblock(fd, 0);
+       if (md == NULL)
+               return -1;
+
+       if (be16_to_cpu(md->signature) != MAC_DRIVER_MAGIC)
+               return -1;
+
+       secsize = be16_to_cpu(md->block_size);
+       data = getblock(fd, secsize/512);
+       if (!data)
+               return -1;
+       part = (struct mac_partition *) (data + secsize%512);
+
+       if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC)
+               return -1;
+
+       blocks_in_map = be32_to_cpu(part->map_count);
+       for (blk = 1; blk <= blocks_in_map && blk <= ns; ++blk, ++n) {
+               int pos = blk * secsize;
+               data = getblock(fd, pos/512);
+               if (!data)
+                       return -1;
+
+               part = (struct mac_partition *) (data + pos%512);
+               if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC)
+                       break;
+
+               sp[n].start = be32_to_cpu(part->start_block) * (secsize/512);
+               sp[n].size = be32_to_cpu(part->block_count) * (secsize/512);
+       }
+       return n;
+}
diff --git a/kpartx/mac.h b/kpartx/mac.h
new file mode 100644 (file)
index 0000000..3c712ba
--- /dev/null
@@ -0,0 +1,30 @@
+#ifndef MAC_H
+#define MAC_H
+
+#include <stdint.h>
+
+#define MAC_PARTITION_MAGIC     0x504d
+
+/* type field value for A/UX or other Unix partitions */
+#define APPLE_AUX_TYPE  "Apple_UNIX_SVR2"
+
+struct mac_partition {
+        uint16_t  signature;      /* expected to be MAC_PARTITION_MAGIC */
+        uint16_t  res1;
+        uint32_t  map_count;      /* # blocks in partition map */
+        uint32_t  start_block;    /* absolute starting block # of partition */
+        uint32_t  block_count;    /* number of blocks in partition */
+        /* there is more stuff after this that we don't need */
+};
+
+#define MAC_DRIVER_MAGIC        0x4552
+
+/* Driver descriptor structure, in block 0 */
+struct mac_driver_desc {
+        uint16_t  signature;      /* expected to be MAC_DRIVER_MAGIC */
+        uint16_t  block_size;
+        uint32_t  block_count;
+    /* ... more stuff */
+};
+
+#endif
diff --git a/kpartx/solaris.c b/kpartx/solaris.c
new file mode 100644 (file)
index 0000000..e3000e9
--- /dev/null
@@ -0,0 +1,71 @@
+#include "kpartx.h"
+#include <stdio.h>
+#include <sys/types.h>
+#include <time.h>              /* time_t */
+
+#define SOLARIS_X86_NUMSLICE   8
+#define SOLARIS_X86_VTOC_SANE  (0x600DDEEEUL)
+
+//typedef int daddr_t;         /* or long - check */
+
+struct solaris_x86_slice {
+       unsigned short  s_tag;          /* ID tag of partition */
+       unsigned short  s_flag;         /* permision flags */
+       daddr_t         s_start;        /* start sector no of partition */
+       long            s_size;         /* # of blocks in partition */
+};
+
+struct solaris_x86_vtoc {
+       unsigned long v_bootinfo[3];    /* info for mboot */
+       unsigned long v_sanity;         /* to verify vtoc sanity */
+       unsigned long v_version;        /* layout version */
+       char    v_volume[8];            /* volume name */
+       unsigned short  v_sectorsz;     /* sector size in bytes */
+       unsigned short  v_nparts;       /* number of partitions */
+       unsigned long v_reserved[10];   /* free space */
+       struct solaris_x86_slice
+               v_slice[SOLARIS_X86_NUMSLICE];   /* slice headers */
+       time_t  timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp */
+       char    v_asciilabel[128];      /* for compatibility */
+};
+
+int
+read_solaris_pt(int fd, struct slice all, struct slice *sp, int ns) {
+       struct solaris_x86_vtoc *v;
+       struct solaris_x86_slice *s;
+       unsigned int offset = all.start;
+       int i, n;
+       char *bp;
+
+       bp = getblock(fd, offset+1);    /* 1 sector suffices */
+       if (bp == NULL)
+               return -1;
+
+       v = (struct solaris_x86_vtoc *) bp;
+       if(v->v_sanity != SOLARIS_X86_VTOC_SANE)
+               return -1;
+
+       if(v->v_version != 1) {
+               fprintf(stderr, "Cannot handle solaris version %ld vtoc\n",
+                      v->v_version);
+               return 0;
+       }
+
+       for(i=0, n=0; i<SOLARIS_X86_NUMSLICE; i++) {
+               s = &v->v_slice[i];
+
+               if (s->s_size == 0)
+                       continue;
+               if (n < ns) {
+                       sp[n].start = offset + s->s_start;
+                       sp[n].size = s->s_size;
+                       n++;
+               } else {
+                       fprintf(stderr,
+                               "solaris_x86_partition: too many slices\n");
+                       break;
+               }
+       }
+       return n;
+}
+
diff --git a/kpartx/sun.c b/kpartx/sun.c
new file mode 100644 (file)
index 0000000..3d88b21
--- /dev/null
@@ -0,0 +1,131 @@
+/*
+ * Lifted from util-linux' partx sun.c
+ *
+ * Copyrights of the original file apply
+ * Copyright (c) 2007 Hannes Reinecke
+ */
+#include "kpartx.h"
+#include "byteorder.h"
+#include <stdio.h>
+#include <sys/types.h>
+#include <time.h>              /* time_t */
+
+#define SUN_DISK_MAGIC         0xDABE  /* Disk magic number */
+#define SUN_DISK_MAXPARTITIONS 8
+
+struct __attribute__ ((packed)) sun_raw_part {
+       u_int32_t       start_cylinder; /* where the part starts... */
+       u_int32_t       num_sectors;    /* ...and it's length */
+};
+
+struct __attribute__ ((packed)) sun_part_info {
+       u_int8_t        spare1;
+       u_int8_t        id;             /* Partition type */
+       u_int8_t        spare2;
+       u_int8_t        flags;          /* Partition flags */
+};
+
+struct __attribute__ ((packed)) sun_disk_label {
+       char            info[128];      /* Informative text string */
+       u_int8_t        spare0[14];
+       struct sun_part_info infos[SUN_DISK_MAXPARTITIONS];
+       u_int8_t        spare1[246];    /* Boot information etc. */
+       u_int16_t       rspeed;         /* Disk rotational speed */
+       u_int16_t       pcylcount;      /* Physical cylinder count */
+       u_int16_t       sparecyl;       /* extra sects per cylinder */
+       u_int8_t        spare2[4];      /* More magic... */
+       u_int16_t       ilfact;         /* Interleave factor */
+       u_int16_t       ncyl;           /* Data cylinder count */
+       u_int16_t       nacyl;          /* Alt. cylinder count */
+       u_int16_t       ntrks;          /* Tracks per cylinder */
+       u_int16_t       nsect;          /* Sectors per track */
+       u_int8_t        spare3[4];      /* Even more magic... */
+       struct sun_raw_part partitions[SUN_DISK_MAXPARTITIONS];
+       u_int16_t       magic;          /* Magic number */
+       u_int16_t       csum;           /* Label xor'd checksum */
+};
+
+/* Checksum Verification */
+static int
+sun_verify_checksum (struct sun_disk_label *label)
+{
+       u_int16_t *ush = ((u_int16_t *)(label + 1)) - 1;
+       u_int16_t csum = 0;
+
+       while (ush >= (u_int16_t *)label)
+               csum ^= *ush--;
+
+       return !csum;
+}
+
+int
+read_sun_pt(int fd, struct slice all, struct slice *sp, int ns) {
+       struct sun_disk_label *l;
+       struct sun_raw_part *s;
+       unsigned int offset = all.start, end;
+       int i, j, n;
+       char *bp;
+
+       bp = getblock(fd, offset);
+       if (bp == NULL)
+               return -1;
+
+       l = (struct sun_disk_label *) bp;
+       if(be16_to_cpu(l->magic) != SUN_DISK_MAGIC)
+               return -1;
+
+       if (!sun_verify_checksum(l)) {
+               fprintf(stderr, "Corrupted Sun disk label\n");
+               return -1;
+       }
+
+       for(i=0, n=0; i<SUN_DISK_MAXPARTITIONS; i++) {
+               s = &l->partitions[i];
+
+               if (s->num_sectors == 0)
+                       continue;
+               if (n < ns) {
+                       sp[n].start = offset +
+                               be32_to_cpu(s->start_cylinder) * be16_to_cpu(l->nsect) * be16_to_cpu(l->ntrks);
+                       sp[n].size = be32_to_cpu(s->num_sectors);
+                       n++;
+               } else {
+                       fprintf(stderr,
+                               "sun_disklabel: too many slices\n");
+                       break;
+               }
+       }
+       /*
+        * Convention has it that the SUN disklabel will always have
+        * the 'c' partition spanning the entire disk.
+        * So we have to check for contained slices.
+        */
+       for(i = 0; i < SUN_DISK_MAXPARTITIONS; i++) {
+               if (sp[i].size == 0)
+                       continue;
+
+               end = sp[i].start + sp[i].size;
+               for(j = 0; j < SUN_DISK_MAXPARTITIONS; j ++) {
+                       if ( i == j )
+                               continue;
+                       if (sp[j].size == 0)
+                               continue;
+
+                       if (sp[i].start < sp[j].start) {
+                               if (end > sp[j].start &&
+                                   end < sp[j].start + sp[j].size) {
+                                       /* Invalid slice */
+                                       fprintf(stderr,
+                                               "sun_disklabel: slice %d overlaps with %d\n", i , j);
+                                       sp[i].size = 0;
+                               }
+                       } else {
+                               if (end <= sp[j].start + sp[j].size) {
+                                       sp[i].container = j + 1;
+                               }
+                       }
+               }
+       }
+       return n;
+}
+
diff --git a/kpartx/sysmacros.h b/kpartx/sysmacros.h
new file mode 100644 (file)
index 0000000..171b33d
--- /dev/null
@@ -0,0 +1,9 @@
+/* versions to be used with > 16-bit dev_t - leave unused for now */
+
+#ifndef major
+#define major(dev)     ((dev) >> 8)
+#endif
+
+#ifndef minor
+#define minor(dev)     ((dev) & 0xff)
+#endif
diff --git a/kpartx/unixware.c b/kpartx/unixware.c
new file mode 100644 (file)
index 0000000..41cc957
--- /dev/null
@@ -0,0 +1,83 @@
+#include "kpartx.h"
+#include <stdio.h>
+
+#define UNIXWARE_FS_UNUSED     0
+#define UNIXWARE_NUMSLICE      16
+#define UNIXWARE_DISKMAGIC     (0xCA5E600D)
+#define UNIXWARE_DISKMAGIC2    (0x600DDEEE)
+
+struct unixware_slice {
+       unsigned short s_label;         /* label */
+       unsigned short s_flags;         /* permission flags */
+       unsigned int   start_sect;      /* starting sector */
+       unsigned int   nr_sects;        /* number of sectors in slice */
+};
+
+struct unixware_disklabel {
+       unsigned int   d_type;          /* drive type */
+       unsigned char  d_magic[4];      /* the magic number */
+       unsigned int   d_version;       /* version number */
+       char    d_serial[12];           /* serial number of the device */
+       unsigned int   d_ncylinders;    /* # of data cylinders per device */
+       unsigned int   d_ntracks;       /* # of tracks per cylinder */
+       unsigned int   d_nsectors;      /* # of data sectors per track */
+       unsigned int   d_secsize;       /* # of bytes per sector */
+       unsigned int   d_part_start;    /* # of first sector of this partition */
+       unsigned int   d_unknown1[12];  /* ? */
+       unsigned int   d_alt_tbl;       /* byte offset of alternate table */
+       unsigned int   d_alt_len;       /* byte length of alternate table */
+       unsigned int   d_phys_cyl;      /* # of physical cylinders per device */
+       unsigned int   d_phys_trk;      /* # of physical tracks per cylinder */
+       unsigned int   d_phys_sec;      /* # of physical sectors per track */
+       unsigned int   d_phys_bytes;    /* # of physical bytes per sector */
+       unsigned int   d_unknown2;      /* ? */
+       unsigned int   d_unknown3;      /* ? */
+       unsigned int   d_pad[8];        /* pad */
+
+       struct unixware_vtoc {
+               unsigned char   v_magic[4];     /* the magic number */
+               unsigned int    v_version;      /* version number */
+               char    v_name[8];              /* volume name */
+               unsigned short  v_nslices;      /* # of slices */
+               unsigned short  v_unknown1;     /* ? */
+               unsigned int    v_reserved[10]; /* reserved */
+               struct unixware_slice
+                   v_slice[UNIXWARE_NUMSLICE]; /* slice headers */
+       } vtoc;
+
+};  /* 408 */
+
+int
+read_unixware_pt(int fd, struct slice all, struct slice *sp, int ns) {
+       struct unixware_disklabel *l;
+       struct unixware_slice *p;
+       unsigned int offset = all.start;
+       char *bp;
+       int n = 0;
+
+       bp = getblock(fd, offset+29);   /* 1 sector suffices */
+       if (bp == NULL)
+               return -1;
+
+       l = (struct unixware_disklabel *) bp;
+       if (four2int(l->d_magic) != UNIXWARE_DISKMAGIC ||
+           four2int(l->vtoc.v_magic) != UNIXWARE_DISKMAGIC2)
+               return -1;
+
+       p = &l->vtoc.v_slice[1];        /* slice 0 is the whole disk. */
+       while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) {
+               if (p->s_label == UNIXWARE_FS_UNUSED)
+                       /* nothing */;
+               else if (n < ns) {
+                       sp[n].start = p->start_sect;
+                       sp[n].size = p->nr_sects;
+                       n++;
+               } else {
+                       fprintf(stderr,
+                               "unixware_partition: too many slices\n");
+                       break;
+               }
+               p++;
+       }
+       return n;
+}
diff --git a/kpartx/xstrncpy.c b/kpartx/xstrncpy.c
new file mode 100644 (file)
index 0000000..7975426
--- /dev/null
@@ -0,0 +1,10 @@
+/* NUL-terminated version of strncpy() */
+#include <string.h>
+#include "xstrncpy.h"
+
+/* caller guarantees n > 0 */
+void
+xstrncpy(char *dest, const char *src, size_t n) {
+       strncpy(dest, src, n-1);
+       dest[n-1] = 0;
+}
diff --git a/kpartx/xstrncpy.h b/kpartx/xstrncpy.h
new file mode 100644 (file)
index 0000000..05c8fa2
--- /dev/null
@@ -0,0 +1 @@
+extern void xstrncpy(char *dest, const char *src, size_t n);
diff --git a/multipath/01_udev b/multipath/01_udev
new file mode 100755 (executable)
index 0000000..0f68996
--- /dev/null
@@ -0,0 +1,50 @@
+#!/bin/sh
+#
+cp /sbin/udev $INITRDDIR/sbin/hotplug
+cp /sbin/udevstart $INITRDDIR/sbin/
+cp /bin/mountpoint $INITRDDIR/bin/
+cp /bin/readlink $INITRDDIR/bin/
+
+PROGS="/sbin/udev /sbin/udevstart /bin/mountpoint /bin/readlink"
+LIBS=`ldd $PROGS | grep -v linux-gate.so | sort -u | \
+awk '{print $3}'` 
+for i in $LIBS
+do
+       mkdir -p `dirname $INITRDDIR/$i`
+       cp $i $INITRDDIR/$i
+done
+
+#
+# config files
+#
+if [ -d /etc/dev.d ]
+then
+       cp -a /etc/dev.d $INITRDDIR/etc/
+fi
+
+if [ -d /etc/udev ]
+then
+       cp -a /etc/udev $INITRDDIR/etc/
+fi
+
+#
+# run udev from initrd
+#
+cat <<EOF >| $INITRDDIR/scripts/10_udev.sh
+
+cd /
+mount -nt proc proc proc
+mount -nt sysfs sysfs sys
+mount -nt tmpfs tmpfs dev || mount -nt ramfs ramfs dev
+mount -nt tmpfs tmpfs tmp || mount -nt ramfs ramfs tmp
+
+#modprobe dm-mod
+#modprobe dm-multipath
+/sbin/udevstart
+
+umount -n tmp
+umount -n sys
+umount -n proc
+
+sleep 2
+EOF
diff --git a/multipath/02_multipath b/multipath/02_multipath
new file mode 100755 (executable)
index 0000000..467a7cb
--- /dev/null
@@ -0,0 +1,33 @@
+#!/bin/sh
+#
+# store the multipath tool in the initrd
+# hotplug & udev will take care of calling it when appropriate
+# this tool is statically linked against klibc : no additional libs
+#
+cp /sbin/multipath $INITRDDIR/sbin
+cp /sbin/kpartx $INITRDDIR/sbin
+
+#
+# feed the dependencies too
+# scsi_id is dynamicaly linked, so store the libs too
+#
+cp /lib/udev/scsi_id $INITRDDIR/lib/udev/
+cp /bin/mountpoint $INITRDDIR/bin
+
+PROGS="/lib/udev/scsi_id /bin/mountpoint"
+LIBS=`ldd $PROGS | grep -v linux-gate.so | sort -u | \
+awk '{print $3}'` 
+for i in $LIBS
+do
+       mkdir -p `dirname $INITRDDIR/$i`
+       cp $i $INITRDDIR/$i
+done
+
+#
+# config file ?
+#
+if [ -f /etc/multipath.conf ]
+then
+       cp /etc/multipath.conf $INITRDDIR/etc/
+fi
+
diff --git a/multipath/Makefile b/multipath/Makefile
new file mode 100644 (file)
index 0000000..5cbab2f
--- /dev/null
@@ -0,0 +1,38 @@
+# Makefile
+#
+# Copyright (C) 2003 Christophe Varoqui, <christophe.varoqui@opensvc.com>
+#
+include ../Makefile.inc
+
+OBJS = main.o
+
+CFLAGS += -I$(multipathdir)
+LDFLAGS += -lpthread -ldevmapper -ldl -lmultipath -L$(multipathdir)
+
+EXEC = multipath
+
+all: $(EXEC)
+
+$(EXEC): $(OBJS)
+       $(CC) $(CFLAGS) $(OBJS) -o $(EXEC) $(LDFLAGS)
+       $(GZIP) $(EXEC).8 > $(EXEC).8.gz
+       $(GZIP) $(EXEC).conf.5 > $(EXEC).conf.5.gz
+
+install:
+       $(INSTALL_PROGRAM) -d $(DESTDIR)$(bindir)
+       $(INSTALL_PROGRAM) -m 755 $(EXEC) $(DESTDIR)$(bindir)/
+       $(INSTALL_PROGRAM) -d $(DESTDIR)/etc/udev/rules.d
+       $(INSTALL_PROGRAM) -m 644 multipath.rules $(DESTDIR)/etc/udev/rules.d/
+       $(INSTALL_PROGRAM) -d $(DESTDIR)$(mandir)
+       $(INSTALL_PROGRAM) -m 644 $(EXEC).8.gz $(DESTDIR)$(mandir)
+       $(INSTALL_PROGRAM) -d $(DESTDIR)$(man5dir)
+       $(INSTALL_PROGRAM) -m 644 $(EXEC).conf.5.gz $(DESTDIR)$(man5dir)
+
+uninstall:
+       rm $(DESTDIR)/etc/udev/rules.d/multipath.rules
+       rm $(DESTDIR)$(bindir)/$(EXEC)
+       rm $(DESTDIR)$(mandir)/$(EXEC).8.gz
+       rm $(DESTDIR)$(man5dir)/$(EXEC).conf.5.gz
+
+clean:
+       rm -f core *.o $(EXEC) *.gz
diff --git a/multipath/dev_t.h b/multipath/dev_t.h
new file mode 100644 (file)
index 0000000..90c64f3
--- /dev/null
@@ -0,0 +1,15 @@
+#define MINORBITS       20
+#define MINORMASK       ((1U << MINORBITS) - 1)
+
+#define MAJOR(dev)      ((unsigned int) ((dev) >> MINORBITS))
+#define MINOR(dev)      ((unsigned int) ((dev) & MINORMASK))
+#define MKDEV(ma,mi)    (((ma) << MINORBITS) | (mi))
+
+#define print_dev_t(buffer, dev)                                        \
+       sprintf((buffer), "%u:%u\n", MAJOR(dev), MINOR(dev))
+
+#define format_dev_t(buffer, dev)                                       \
+       ({                                                              \
+               sprintf(buffer, "%u:%u", MAJOR(dev), MINOR(dev));       \
+               buffer;                                                 \
+       })
diff --git a/multipath/main.c b/multipath/main.c
new file mode 100644 (file)
index 0000000..1689efd
--- /dev/null
@@ -0,0 +1,477 @@
+/*
+ * Soft:        multipath device mapper target autoconfig
+ *
+ * Version:     $Id: main.h,v 0.0.1 2003/09/18 15:13:38 cvaroqui Exp $
+ *
+ * Author:      Christophe Varoqui
+ *
+ *              This program is distributed in the hope that it will be useful,
+ *              but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *              MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *              See the GNU General Public License for more details.
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Copyright (c) 2003, 2004, 2005 Christophe Varoqui
+ * Copyright (c) 2005 Benjamin Marzinski, Redhat
+ * Copyright (c) 2005 Kiyoshi Ueda, NEC
+ * Copyright (c) 2005 Patrick Caulfield, Redhat
+ * Copyright (c) 2005 Edward Goggin, EMC
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <ctype.h>
+
+#include <checkers.h>
+#include <prio.h>
+#include <vector.h>
+#include <memory.h>
+#include <libdevmapper.h>
+#include <devmapper.h>
+#include <util.h>
+#include <defaults.h>
+#include <structs.h>
+#include <structs_vec.h>
+#include <dmparser.h>
+#include <sysfs.h>
+#include <config.h>
+#include <blacklist.h>
+#include <discovery.h>
+#include <debug.h>
+#include <switchgroup.h>
+#include <print.h>
+#include <alias.h>
+#include <configure.h>
+#include <pgpolicies.h>
+#include <version.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+int logsink;
+
+static int
+filter_pathvec (vector pathvec, char * refwwid)
+{
+       int i;
+       struct path * pp;
+
+       if (!refwwid || !strlen(refwwid))
+               return 0;
+
+       vector_foreach_slot (pathvec, pp, i) {
+               if (strncmp(pp->wwid, refwwid, WWID_SIZE) != 0) {
+                       condlog(3, "skip path %s : out of scope", pp->dev);
+                       free_path(pp);
+                       vector_del_slot(pathvec, i);
+                       i--;
+               }
+       }
+       return 0;
+}
+
+static void
+usage (char * progname)
+{
+       fprintf (stderr, VERSION_STRING);
+       fprintf (stderr, "Usage:\n");
+       fprintf (stderr, "  %s [-d] [-r] [-v lvl] [-p pol] [-b fil] [dev]\n", progname);
+       fprintf (stderr, "  %s -l|-ll|-f [-v lvl] [-b fil] [dev]\n", progname);
+       fprintf (stderr, "  %s -F [-v lvl]\n", progname);
+       fprintf (stderr, "  %s -h\n", progname);
+       fprintf (stderr,
+               "\n"
+               "Where:\n"
+               "  -h      print this usage text\n" \
+               "  -l      show multipath topology (sysfs and DM info)\n" \
+               "  -ll     show multipath topology (maximum info)\n" \
+               "  -f      flush a multipath device map\n" \
+               "  -F      flush all multipath device maps\n" \
+               "  -d      dry run, do not create or update devmaps\n" \
+               "  -r      force devmap reload\n" \
+               "  -p      policy failover|multibus|group_by_serial|group_by_prio\n" \
+               "  -b fil  bindings file location\n" \
+               "  -p pol  force all maps to specified path grouping policy :\n" \
+               "          . failover            one path per priority group\n" \
+               "          . multibus            all paths in one priority group\n" \
+               "          . group_by_serial     one priority group per serial\n" \
+               "          . group_by_prio       one priority group per priority lvl\n" \
+               "          . group_by_node_name  one priority group per target node\n" \
+               "  -v lvl  verbosity level\n" \
+               "          . 0 no output\n" \
+               "          . 1 print created devmap names only\n" \
+               "          . 2 default verbosity\n" \
+               "          . 3 print debug information\n" \
+               "  dev     action limited to:\n" \
+               "          . multipath named 'dev' (ex: mpath0) or\n" \
+               "          . multipath whose wwid is 'dev' (ex: 60051..)\n" \
+               "          . multipath including the path named 'dev' (ex: /dev/sda)\n" \
+               "          . multipath including the path with maj:min 'dev' (ex: 8:0)\n" \
+               );
+
+       exit(1);
+}
+
+static int
+update_paths (struct multipath * mpp)
+{
+       int i, j;
+       struct pathgroup * pgp;
+       struct path * pp;
+
+       if (!mpp->pg)
+               return 0;
+
+       vector_foreach_slot (mpp->pg, pgp, i) {
+               if (!pgp->paths)
+                       continue;
+
+               vector_foreach_slot (pgp->paths, pp, j) {
+                       if (!strlen(pp->dev)) {
+                               if (devt2devname(pp->dev, pp->dev_t)) {
+                                       /*
+                                        * path is not in sysfs anymore
+                                        */
+                                       pp->state = PATH_DOWN;
+                                       continue;
+                               }
+                               pp->mpp = mpp;
+                               pathinfo(pp, conf->hwtable, DI_ALL);
+                               continue;
+                       }
+                       pp->mpp = mpp;
+                       if (pp->state == PATH_UNCHECKED ||
+                           pp->state == PATH_WILD)
+                               pathinfo(pp, conf->hwtable, DI_CHECKER);
+
+                       if (pp->priority == PRIO_UNDEF)
+                               pathinfo(pp, conf->hwtable, DI_PRIO);
+               }
+       }
+       return 0;
+}
+
+static int
+get_dm_mpvec (vector curmp, vector pathvec, char * refwwid)
+{
+       int i;
+       struct multipath * mpp;
+
+       if (dm_get_maps(curmp))
+               return 1;
+
+       vector_foreach_slot (curmp, mpp, i) {
+               /*
+                * discard out of scope maps
+                */
+               if (mpp->wwid && refwwid &&
+                   strncmp(mpp->wwid, refwwid, WWID_SIZE)) {
+                       condlog(3, "skip map %s: out of scope", mpp->alias);
+                       free_multipath(mpp, KEEP_PATHS);
+                       vector_del_slot(curmp, i);
+                       i--;
+                       continue;
+               }
+
+               condlog(3, "params = %s", mpp->params);
+               condlog(3, "status = %s", mpp->status);
+
+               disassemble_map(pathvec, mpp->params, mpp);
+
+               /*
+                * disassemble_map() can add new paths to pathvec.
+                * If not in "fast list mode", we need to fetch information
+                * about them
+                */
+               if (conf->list != 1)
+                       update_paths(mpp);
+
+               if (conf->list > 1)
+                       mpp->bestpg = select_path_group(mpp);
+
+               disassemble_status(mpp->status, mpp);
+
+               if (conf->list)
+                       print_multipath_topology(mpp, conf->verbosity);
+
+               if (!conf->dry_run)
+                       reinstate_paths(mpp);
+       }
+       return 0;
+}
+
+
+/*
+ * Return value:
+ *  -1: Retry
+ *   0: Success
+ *   1: Failure
+ */
+static int
+configure (void)
+{
+       vector curmp = NULL;
+       vector pathvec = NULL;
+       struct vectors vecs;
+       int r = 1;
+       int di_flag = 0;
+       char * refwwid = NULL;
+       char * dev = NULL;
+
+       /*
+        * allocate core vectors to store paths and multipaths
+        */
+       curmp = vector_alloc();
+       pathvec = vector_alloc();
+
+       if (!curmp || !pathvec) {
+               condlog(0, "can not allocate memory");
+               goto out;
+       }
+       vecs.pathvec = pathvec;
+       vecs.mpvec = curmp;
+
+       /*
+        * dev is "/dev/" . "sysfs block dev"
+        */
+       if (conf->dev) {
+               if (!strncmp(conf->dev, "/dev/", 5) &&
+                   strlen(conf->dev) > 5)
+                       dev = conf->dev + 5;
+               else
+                       dev = conf->dev;
+       }
+       
+       /*
+        * if we have a blacklisted device parameter, exit early
+        */
+       if (dev && 
+           (filter_devnode(conf->blist_devnode, conf->elist_devnode, dev) > 0))
+                       goto out;
+       
+       /*
+        * scope limiting must be translated into a wwid
+        * failing the translation is fatal (by policy)
+        */
+       if (conf->dev) {
+               refwwid = get_refwwid(conf->dev, conf->dev_type, pathvec);
+
+               if (!refwwid) {
+                       condlog(3, "scope is nul");
+                       goto out;
+               }
+               condlog(3, "scope limited to %s", refwwid);
+               if (filter_wwid(conf->blist_wwid, conf->elist_wwid,
+                               refwwid) > 0)
+                       goto out;
+       }
+
+       /*
+        * get a path list
+        */
+       if (conf->dev)
+               di_flag = DI_WWID;
+
+       if (conf->list > 1)
+               /* extended path info '-ll' */
+               di_flag |= DI_SYSFS | DI_CHECKER;
+       else if (conf->list)
+               /* minimum path info '-l' */
+               di_flag |= DI_SYSFS;
+       else
+               /* maximum info */
+               di_flag = DI_ALL;
+
+       if (path_discovery(pathvec, conf, di_flag))
+               goto out;
+
+       if (conf->verbosity > 2)
+               print_all_paths(pathvec, 1);
+
+       get_path_layout(pathvec, 0);
+
+       if (get_dm_mpvec(curmp, pathvec, refwwid))
+               goto out;
+
+       filter_pathvec(pathvec, refwwid);
+
+       if (conf->list) {
+               r = 0;
+               goto out;
+       }
+
+       /*
+        * core logic entry point
+        */
+       r = coalesce_paths(&vecs, NULL, NULL, conf->force_reload);
+
+out:
+       if (refwwid)
+               FREE(refwwid);
+
+       free_multipathvec(curmp, KEEP_PATHS);
+       free_pathvec(pathvec, FREE_PATHS);
+
+       return r;
+}
+
+int
+main (int argc, char *argv[])
+{
+       int arg;
+       extern char *optarg;
+       extern int optind;
+       int i, r = 1;
+
+       if (getuid() != 0) {
+               fprintf(stderr, "need to be root\n");
+               exit(1);
+       }
+
+       if (dm_prereq())
+               exit(1);
+
+       if (load_config(DEFAULT_CONFIGFILE))
+               exit(1);
+
+       if (init_checkers()) {
+               condlog(0, "failed to initialize checkers");
+               exit(1);
+       }
+       if (init_prio()) {
+               condlog(0, "failed to initialize prioritizers");
+               exit(1);
+       }
+       if (sysfs_init(conf->sysfs_dir, FILE_NAME_SIZE)) {
+               condlog(0, "multipath tools need sysfs mounted");
+               exit(1);
+       }
+       while ((arg = getopt(argc, argv, ":dhl::FfM:v:p:b:r")) != EOF ) {
+               switch(arg) {
+               case 1: printf("optarg : %s\n",optarg);
+                       break;
+               case 'v':
+                       if (sizeof(optarg) > sizeof(char *) ||
+                           !isdigit(optarg[0]))
+                               usage (argv[0]);
+
+                       conf->verbosity = atoi(optarg);
+                       break;
+               case 'b':
+                       conf->bindings_file = optarg;
+                       break;
+               case 'd':
+                       conf->dry_run = 1;
+                       break;
+               case 'f':
+                       conf->remove = FLUSH_ONE;
+                       break;
+               case 'F':
+                       conf->remove = FLUSH_ALL;
+                       break;
+               case 'l':
+                       conf->list = 1;
+                       conf->dry_run = 1;
+
+                       if (optarg && !strncmp(optarg, "l", 1))
+                               conf->list++;
+
+                       break;
+               case 'M':
+#if _DEBUG_
+                       debug = atoi(optarg);
+#endif
+                       break;
+               case 'p':
+                       conf->pgpolicy_flag = get_pgpolicy_id(optarg);
+                       if (conf->pgpolicy_flag == -1) {
+                               printf("'%s' is not a valid policy\n", optarg);
+                               usage(argv[0]);
+                       }                
+                       break;
+               case 'r':
+                       conf->force_reload = 1;
+                       break;
+               case 'h':
+                       usage(argv[0]);
+               case ':':
+                       fprintf(stderr, "Missing option arguement\n");
+                       usage(argv[0]);        
+               case '?':
+                       fprintf(stderr, "Unknown switch: %s\n", optarg);
+                       usage(argv[0]);
+               default:
+                       usage(argv[0]);
+               }
+       }        
+       if (optind < argc) {
+               conf->dev = MALLOC(FILE_NAME_SIZE);
+
+               if (!conf->dev)
+                       goto out;
+
+               strncpy(conf->dev, argv[optind], FILE_NAME_SIZE);
+
+               if (filepresent(conf->dev))
+                       conf->dev_type = DEV_DEVNODE;
+               else if (sscanf(conf->dev, "%d:%d", &i, &i) == 2)
+                       conf->dev_type = DEV_DEVT;
+               else
+                       conf->dev_type = DEV_DEVMAP;
+
+       }
+       conf->daemon = 0;
+
+       if (conf->max_fds) {
+               struct rlimit fd_limit;
+
+               fd_limit.rlim_cur = conf->max_fds;
+               fd_limit.rlim_max = conf->max_fds;
+               if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0)
+                       condlog(0, "can't set open fds limit to %d : %s\n",
+                               conf->max_fds, strerror(errno));
+       }
+
+       dm_init();
+
+       if (conf->remove == FLUSH_ONE) {
+               if (conf->dev_type == DEV_DEVMAP)
+                       r = dm_flush_map(conf->dev);
+               else
+                       condlog(0, "must provide a map name to remove");
+
+               goto out;
+       }
+       else if (conf->remove == FLUSH_ALL) {
+               r = dm_flush_maps();
+               goto out;
+       }
+       while ((r = configure()) < 0)
+               condlog(3, "restart multipath configuration process");
+       
+out:
+       dm_udev_wait(conf->cookie);
+
+       sysfs_cleanup();
+       dm_lib_release();
+       dm_lib_exit();
+
+       cleanup_prio();
+       cleanup_checkers();
+       /*
+        * Freeing config must be done after dm_lib_exit(), because
+        * the logging function (dm_write_log()), which is called there,
+        * references the config.
+        */
+       free_config(conf);
+       conf = NULL;
+
+#ifdef _DEBUG_
+       dbg_free_final(NULL);
+#endif
+       return r;
+}
diff --git a/multipath/multipath.8 b/multipath/multipath.8
new file mode 100644 (file)
index 0000000..7ba7126
--- /dev/null
@@ -0,0 +1,84 @@
+.TH MULTIPATH 8 "July 2006" "" "Linux Administrator's Manual"
+.SH NAME
+multipath \- Device mapper target autoconfig
+.SH SYNOPSIS
+.B multipath
+.RB [\| \-v\ \c
+.IR verbosity \|]
+.RB [\| \-d \|]
+.RB [\| \-h | \-l | \-ll | \-f | \-F \|]
+.RB [\| \-p\ \c
+.BR failover | multibus | group_by_serial | group_by_prio | group_by_node_name \|]
+.RB [\| device \|]
+.SH DESCRIPTION
+.B multipath
+is used to detect multiple paths to devices for fail-over or performance reasons and coalesces them
+.SH OPTIONS
+.TP
+.B \-v " level"
+verbosity, print all paths and multipaths
+.RS 1.2i
+.TP 1.2i
+.B 0
+no output
+.TP
+.B 1
+print the created or updated multipath names only, for use to feed other tools like kpartx
+.TP
+.B 2 +
+print all info : detected paths, coalesced paths (ie multipaths) and device maps
+.RE
+.TP
+.B \-h
+print usage text
+.TP
+.B \-d
+dry run, do not create or update devmaps
+.TP
+.B \-l
+show the current multipath topology from information fetched in sysfs and the device mapper
+.TP
+.B \-ll
+show the current multipath topology from all available information (sysfs, the device mapper, path checkers ...)
+.TP
+.B \-f
+flush a multipath device map specified as parameter, if unused
+.TP
+.B \-F
+flush all unused multipath device maps
+.TP
+.BI \-p " policy"
+force maps to specified policy:
+.RS 1.2i
+.TP 1.2i
+.B failover
+1 path per priority group
+.TP
+.B multibus
+all paths in 1 priority group
+.TP
+.B group_by_serial
+1 priority group per serial
+.TP
+.B group_by_prio
+1 priority group per priority value. Priorities are determined by callout programs specified as a global, per-controller or per-multipath option in the configuration file
+.TP
+.B group_by_node_name
+1 priority group per target node name. Target node names are fetched in /sys/class/fc_transport/target*/node_name.
+.RE
+.TP
+.BI device
+update only the devmap the path pointed by
+.I device
+is in. 
+.I device
+is in the /dev/sdb (as shown by udev in the $DEVNAME variable) or major:minor format.
+.I device
+may alternatively be a multipath mapname
+.SH "SEE ALSO"
+.BR udev (8),
+.BR dmsetup (8)
+.BR hotplug (8)
+.SH AUTHORS
+.B multipath
+was developed by Christophe Varoqui, <christophe.varoqui@opensvc.com> and others.
diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
new file mode 100644 (file)
index 0000000..165982e
--- /dev/null
@@ -0,0 +1,434 @@
+.TH MULTIPATH.CONF 5 "30 November 2006"
+.SH NAME
+multipath.conf \- multipath daemon configuration file
+.SH DESCRIPTION
+.B "multipath.conf"
+is the configuration file for the multipath daemon. It is used to
+overwrite the built-in configuration table of \fBmultipathd\fP.
+Any line whose first non-white-space character is a '#' is considered
+a comment line. Empty lines are ignored.
+.SH SYNTAX
+The configuration file contains entries of the form:
+.RS
+.nf
+.ft B
+.sp
+<section> {
+.RS
+.ft B
+<attribute> <value>
+.I "..."
+.ft B
+<subsection> {
+.RS
+.ft B
+<attribute> <value>
+.I "..."
+.RE
+}
+.RE
+}
+.ft R
+.fi
+.RE
+.LP
+Each \fIsection\fP contains one or more attributes or subsections. The
+recognized keywords for attributes or subsections depend on the
+section in which they occor.
+.LP
+The following \fIsection\fP keywords are recognized:
+.TP 17
+.B defaults
+This section defines default values for attributes which are used
+whenever no specific setting is given.
+.TP
+.B blacklist
+This section defines which devices should be excluded from the
+multipath topology discovery.
+.TP
+.B blacklist_exceptions
+This section defines which devices should be included in the
+multipath topology discovery, despite being listed in the
+.I blacklist
+section.
+.TP
+.B multipaths
+This section defines the multipath topologies. They are indexed by a
+\fIWorld Wide Identifier\fR(wwid), which is the result of the
+\fIgetuid_callout\fR program.
+.TP
+.B devices
+This section defines the device-specific settings.
+.RE
+.LP
+.SH "defaults section"
+The
+.B defaults
+section recognizes the following keywords:
+.TP 17
+.B polling_interval
+interval between two path checks in seconds For properly functioning paths,
+the interval between checks will gradually increase to (4 * polling_interval);
+default is
+.I 5
+.TP
+.B udev_dir
+directory where udev creates its device nodes; default is
+.I /dev
+.TP
+.B verbosity
+default verbosity. Higher values increase the verbosity level. Valid
+levels are between 0 and 6; default is
+.I 2
+.TP
+.B selector
+The default path selector algorithm to use; they are offered by the
+kernel multipath target. The only currently implemented is
+.I "round-robin 0"
+.TP
+.B path_grouping_policy
+The default path grouping policy to apply to unspecified
+multipaths. Possible values are
+.RS
+.TP 12
+.B failover
+1 path per priority group
+.TP
+.B multibus
+all paths in 1 priority group
+.TP
+.B group_by_serial
+1 priority group per serial number
+.TP
+.B group_by_prio
+1 priority group per priority value. Priorities are determined by
+callout programs specified as a global, per-controller or
+per-multipath option in the configuration file.
+.TP
+.B group_by_node_name
+1 priority group per target node name. Target node names are fetched
+in /sys/class/fc_transport/target*/node_name.
+.TP
+Default value is \fImultibus\fR.
+.RE
+.TP
+.B getuid_callout
+The default program and args to callout to obtain a unique path
+identifier. Should be specified with an absolute path. Default value
+is
+.I /lib/udev/scsi_id --whitelisted --device=/dev/%n
+.TP
+.B prio_callout
+The default program and args to callout to obtain a path priority
+value. The specified program will be executed and should return a
+numeric value specifying the relative priority of this path. Higher
+number have a higher priority. A '%n' in the command line will be expanded
+to the device name, a '%b' will be expanded to the device number in
+.I major:minor
+format.
+.I "none"
+is a valid value. Currently the following path priority programs are
+implemented:
+.RS
+.TP 12
+.B mpath_prio_emc /dev/%n
+Generate the path priority for EMC arrays
+.TP
+.B mpath_prio_alua /dev/%n
+Generate the path priority based on the SCSI-3 ALUA settings.
+.TP
+.B mpath_prio_netapp /dev/%n
+Generate the path priority for NetApp arrays.
+.TP
+.B mpath_prio_rdac /dev/%n
+Generate the path priority for LSI/Engenio RDAC controller.
+.TP
+.B mpath_prio_hp_sw /dev/%n
+Generate the path priority for Compaq/HP controller in
+active/standby mode.
+.TP
+.B mpath_prio_hds_modular %b
+Generate the path priority for Hitachi HDS Modular storage arrays.
+.TP
+Default value is \fBnone\fR.
+.RE
+.TP
+.B features
+Specify any device-mapper features to be used. The most common of
+these features is
+.I "1 queue_if_no_path" 
+Note that this can also be set via the
+.I no_path_retry
+keyword.
+.TP
+.B path_checker
+The default method used to determine the paths' state. Possible values
+are
+.RS
+.TP 12
+.B readsector0
+Read the first sector of the device
+.TP
+.B tur
+Issue a
+.I TEST UNIT READY
+command to the device.
+.TP
+.B emc_clariion
+Query the EMC Clariion specific EVPD page 0xC0 to determine the path
+state.
+.TP
+.B hp_sw
+Check the path state for HP storage arrays with Active/Standby firmware.
+.TP
+.B rdac
+Check the path state for LSI/Engenio RDAC storage controller.
+.TP
+.B directio
+Read the first sector with direct I/O.
+.TP
+Default value is \fIreadsector0\fR.
+.RE
+.TP
+.B failback
+Tell the daemon to manage path group failback, or not to. 0 or
+.I immediate
+means immediate failback, values >0 means deferred failback (in
+seconds).
+.I manual
+means no failback. Default value is
+.I manual
+.TP
+.B  rr_min_io
+The number of IO to route to a path before switching to the next in
+the same path group. Default is
+.I 1000
+.TP
+.B rr_weight
+If set to \fIpriorities\fR the multipath configurator will assign
+path weights as "path prio * rr_min_io". Possible values are
+.I priorities
+or
+.IR uniform .
+Default is
+.IR uniform .
+.TP
+.B no_path_retry
+Specify the number of retries until disable queueing, or
+.I fail
+for immediate failure (no queueing),
+.I queue
+for never stop queueing. Default is 0.
+.TP
+.B user_friendly_names
+If set to 
+.I yes
+, using the bindings file
+.I /etc/multipath/bindings
+to assign a persistent and unique alias to the multipath, in the form of mpath<n>.
+If set to 
+.I no
+use the WWID as the alias. In either case this be will
+be overriden by any specific aliases in the \fImultipaths\fR section.
+Default is
+.I no
+.TP
+.B max_fds
+Specify the maximum number of file descriptors that can be opened by multipath
+and multipathd.  This is equivalent to ulimit -n. A value of \fImax\fR will set
+this to the system limit from /proc/sys/fs/nr_open. If this is not set, the
+maximum number of open fds is taken from the calling process. It is usually
+1024. To be safe, this should be set to the maximum number of paths plus 32,
+if that number is greated than 1024.
+.TP
+.B fast_io_fail_tmo
+Specify the number of seconds the scsi layer will wait after a problem has been
+detected on a FC remote port before failing IO to devices on that remote port.
+This should be smaller than dev_loss_tmo. Setting this to
+.I off
+will disable the timeout.
+.TP
+.B dev_loss_tmo
+Specify the number of seconds the scsi layer will wait after a problem has
+been detected on a FC remote port before removing it from the system.
+.TP
+.B queue_without_daemon
+If set to
+.I no
+, when multipathd stops, queueing will be turned off for all devices.
+This is useful for devices that set no_path_retry.  If a machine is
+shut down while all paths to a device are down, it is possible to hang waiting
+for IO to return from the device after multipathd has been stopped. Without
+multipathd running, access to the paths cannot be restored, and the kernel
+cannot be told to stop queueing IO. Setting queue_without_daemon to
+.I no
+, avoids this problem. Default is
+.I yes
+.
+.SH "blacklist section"
+The
+.I blacklist
+section is used to exclude specific device from inclusion in the
+multipath topology. It is most commonly used to exclude local disks or
+LUNs for the array controller.
+.LP
+The following keywords are recognized:
+.TP 17
+.B wwid
+The \fIWorld Wide Identification\fR of a device.
+.TP
+.B devnode
+Regular expression of the device nodes to be excluded.
+.TP
+.B device
+Subsection for the device description. This subsection recognizes the
+.I vendor
+and
+.I product
+keywords. For a full description of these keywords please see the
+.I devices
+section description.
+.SH "blacklist_exceptions section"
+The
+.I blacklist_exceptions
+section is used to revert the actions of the
+.I blacklist
+section, ie to include specific device in the
+multipath topology. This allows to selectively include devices which
+would normally be excluded via the
+.I blacklist
+section.
+.LP
+The following keywords are recognized:
+.TP 17
+.B wwid
+The \fIWorld Wide Identification\fR of a device.
+.TP
+.B devnode
+Regular expression of the device nodes to be excluded.
+.TP
+.B device
+Subsection for the device description. This subsection recognizes the
+.I vendor
+and
+.I product
+keywords. For a full description of these keywords please see the
+.I devices
+section description.
+.SH "multipaths section"
+The only recognized attribute for the
+.B multipaths
+section is the
+.I multipath
+subsection.
+.LP
+The
+.B multipath
+subsection recognizes the following attributes:
+.TP 17
+.B wwid
+Index of the container. Mandatory for this subsection.
+.TP
+.B alias
+(Optional) symbolic name for the multipath map.
+.LP
+The following attributes are optional; if not set the default values
+are taken from the
+.I defaults
+section:
+.sp 1
+.PD .1v
+.RS
+.TP 18
+.B path_grouping_policy
+.TP
+.B path_selector
+.TP
+.B failback
+.TP
+.B no_path_retry
+.TP
+.B rr_min_io
+.RE
+.PD
+.LP
+.SH "devices section"
+The only recognized attribute for the
+.B devices
+section is the
+.I device
+subsection.
+.LP
+The
+.I device
+subsection recognizes the following attributes:
+.TP 17
+.B vendor
+(Mandatory) Vendor identifier
+.TP
+.B product
+(Mandatory) Product identifier
+.TP
+.B product_blacklist
+Product strings to blacklist for this vendor
+.TP
+.B hardware_handler
+(Optional) The hardware handler to use for this device type.
+The following hardware handler are implemented:
+.RS
+.TP 12
+.B 1 emc
+Hardware handler for EMC storage arrays.
+.RE
+.LP
+The following attributes are optional; if not set the default values
+are taken from the
+.I defaults
+section:
+.sp 1
+.PD .1v
+.RS
+.TP 18
+.B path_grouping_policy
+.TP
+.B getuid_callout
+.TP
+.B path_selector
+.TP
+.B path_checker
+.TP
+.B features
+.TP
+.B prio_callout
+.TP
+.B failback
+.TP
+.B rr_weight
+.TP
+.B no_path_retry
+.TP
+.B rr_min_io
+.TP
+.B fast_io_fail_tmo
+.TP
+.B dev_loss_tmo
+.RE
+.PD
+.LP
+.SH "KNOWN ISSUES"
+The usage of
+.B queue_if_no_path
+option can lead to
+.B D state
+processes being hung and not killable in situations where all the paths to the LUN go offline.
+It is advisable to use the
+.B no_path_retry
+option instead.
+.SH "SEE ALSO"
+.BR udev (8),
+.BR dmsetup (8)
+.BR multipath (8)
+.BR multipathd (8)
+.SH AUTHORS
+.B multipath
+was developed by Christophe Varoqui, <christophe.varoqui@opensvc.com> and others.
diff --git a/multipath/multipath.rules b/multipath/multipath.rules
new file mode 100644 (file)
index 0000000..ac97749
--- /dev/null
@@ -0,0 +1,7 @@
+#
+# udev rules for multipathing.
+# The persistent symlinks are created with the kpartx rules
+#
+
+# socket for uevents
+SUBSYSTEM=="block", RUN+="socket:/org/kernel/dm/multipath_event"
diff --git a/multipathd/Makefile b/multipathd/Makefile
new file mode 100644 (file)
index 0000000..32d9ef5
--- /dev/null
@@ -0,0 +1,48 @@
+EXEC = multipathd
+
+include ../Makefile.inc
+
+#
+# basic flags setting
+#
+CFLAGS += -I$(multipathdir)
+LDFLAGS += -lpthread -ldevmapper -lreadline -lncurses -ldl \
+          -lmultipath -L$(multipathdir)
+
+#
+# debuging stuff
+#
+#CFLAGS += -DLCKDBG
+#CFLAGS += -D_DEBUG_
+#CFLAGS += -DLOGDBG
+
+#
+# object files
+#
+OBJS = main.o pidfile.o uxlsnr.o uxclnt.o cli.o cli_handlers.o
+
+
+#
+# directives
+#
+all : $(EXEC)
+
+$(EXEC): $(OBJS)
+       $(CC) $(CFLAGS) $(LDFLAGS) -o $(EXEC) $(OBJS)
+       $(GZIP) $(EXEC).8 > $(EXEC).8.gz
+
+install:
+       $(INSTALL_PROGRAM) -d $(DESTDIR)$(bindir)
+       $(INSTALL_PROGRAM) -m 755 $(EXEC) $(DESTDIR)$(bindir)
+       $(INSTALL_PROGRAM) -d $(DESTDIR)$(rcdir)
+       $(INSTALL_PROGRAM) -d $(DESTDIR)$(mandir)
+       $(INSTALL_PROGRAM) -m 644 $(EXEC).8.gz $(DESTDIR)$(mandir)
+
+uninstall:
+       rm -f $(DESTDIR)$(bindir)/$(EXEC)
+       rm -f $(DESTDIR)$(rcdir)/$(EXEC)
+       rm -f $(DESTDIR)$(mandir)/$(EXEC).8.gz
+
+clean:
+       rm -f core *.o $(EXEC) *.gz
+
diff --git a/multipathd/cli.c b/multipathd/cli.c
new file mode 100644 (file)
index 0000000..208a0ad
--- /dev/null
@@ -0,0 +1,555 @@
+/*
+ * Copyright (c) 2005 Christophe Varoqui
+ */
+#include <memory.h>
+#include <vector.h>
+#include <parser.h>
+#include <util.h>
+#include <version.h>
+#include <readline/readline.h>
+
+#include "cli.h"
+
+static struct key *
+alloc_key (void)
+{
+       return (struct key *)MALLOC(sizeof(struct key));
+}
+
+static struct handler *
+alloc_handler (void)
+{
+       return (struct handler *)MALLOC(sizeof(struct handler));
+}
+
+static int
+add_key (vector vec, char * str, int code, int has_param)
+{
+       struct key * kw;
+
+       kw = alloc_key();
+
+       if (!kw)
+               return 1;
+
+       kw->code = code;
+       kw->has_param = has_param;
+       kw->str = STRDUP(str);
+
+       if (!kw->str)
+               goto out;
+
+       if (!vector_alloc_slot(vec))
+               goto out1;
+
+       vector_set_slot(vec, kw);
+
+       return 0;
+
+out1:
+       FREE(kw->str);
+out:
+       FREE(kw);
+       return 1;
+}
+
+int
+add_handler (int fp, int (*fn)(void *, char **, int *, void *))
+{
+       struct handler * h;
+
+       h = alloc_handler();
+
+       if (!h)
+               return 1;
+
+       if (!vector_alloc_slot(handlers)) {
+               FREE(h);
+               return 1;
+       }
+
+       vector_set_slot(handlers, h);
+       h->fingerprint = fp;
+       h->fn = fn;
+
+       return 0;
+}
+
+static struct handler *
+find_handler (int fp)
+{
+       int i;
+       struct handler *h;
+
+       vector_foreach_slot (handlers, h, i)
+               if (h->fingerprint == fp)
+                       return h;
+
+       return NULL;
+}
+
+int
+set_handler_callback (int fp, int (*fn)(void *, char **, int *, void *))
+{
+       struct handler * h = find_handler(fp);
+
+       if (!h)
+               return 1;
+       h->fn = fn;
+       return 0;
+}
+
+static void
+free_key (struct key * kw)
+{
+       if (kw->str)
+               FREE(kw->str);
+
+       if (kw->param)
+               FREE(kw->param);
+
+       FREE(kw);
+}
+
+void
+free_keys (vector vec)
+{
+       int i;
+       struct key * kw;
+
+       vector_foreach_slot (vec, kw, i)
+               free_key(kw);
+
+       vector_free(vec);
+}
+
+void
+free_handlers (vector vec)
+{
+       int i;
+       struct handler * h;
+
+       vector_foreach_slot (vec, h, i)
+               FREE(h);
+
+       vector_free(vec);
+}
+
+int
+load_keys (void)
+{
+       int r = 0;
+       keys = vector_alloc();
+
+       if (!keys)
+               return 1;
+
+       r += add_key(keys, "list", LIST, 0);
+       r += add_key(keys, "show", LIST, 0);
+       r += add_key(keys, "add", ADD, 0);
+       r += add_key(keys, "remove", DEL, 0);
+       r += add_key(keys, "del", DEL, 0);
+       r += add_key(keys, "switch", SWITCH, 0);
+       r += add_key(keys, "switchgroup", SWITCH, 0);
+       r += add_key(keys, "suspend", SUSPEND, 0);
+       r += add_key(keys, "resume", RESUME, 0);
+       r += add_key(keys, "reinstate", REINSTATE, 0);
+       r += add_key(keys, "fail", FAIL, 0);
+       r += add_key(keys, "resize", RESIZE, 0);
+       r += add_key(keys, "disablequeueing", DISABLEQ, 0);
+       r += add_key(keys, "restorequeueing", RESTOREQ, 0);
+       r += add_key(keys, "paths", PATHS, 0);
+       r += add_key(keys, "maps", MAPS, 0);
+       r += add_key(keys, "multipaths", MAPS, 0);
+       r += add_key(keys, "path", PATH, 1);
+       r += add_key(keys, "map", MAP, 1);
+       r += add_key(keys, "multipath", MAP, 1);
+       r += add_key(keys, "group", GROUP, 1);
+       r += add_key(keys, "reconfigure", RECONFIGURE, 0);
+       r += add_key(keys, "status", STATUS, 0);
+       r += add_key(keys, "stats", STATS, 0);
+       r += add_key(keys, "topology", TOPOLOGY, 0);
+       r += add_key(keys, "config", CONFIG, 0);
+       r += add_key(keys, "blacklist", BLACKLIST, 0);
+       r += add_key(keys, "devices", DEVICES, 0);
+       r += add_key(keys, "format", FMT, 1);
+       r += add_key(keys, "wildcards", WILDCARDS, 0);
+       r += add_key(keys, "quit", QUIT, 0);
+       r += add_key(keys, "exit", QUIT, 0);
+
+       if (r) {
+               free_keys(keys);
+               keys = NULL;
+               return 1;
+       }
+       return 0;
+}
+
+static struct key *
+find_key (const char * str)
+{
+       int i;
+       int len, klen;
+       struct key * kw = NULL;
+       struct key * foundkw = NULL;
+
+       len = strlen(str);
+
+       vector_foreach_slot (keys, kw, i) {
+               if (strncmp(kw->str, str, len))
+                       continue;
+               klen = strlen(kw->str);
+               if (len == klen)
+                       return kw; /* exact match */
+               if (len < klen) {
+                       if (!foundkw)
+                               foundkw = kw; /* shortcut match */
+                       else
+                               return NULL; /* ambiguous word */
+               }
+       }
+       return foundkw;
+}
+
+#define E_SYNTAX       1
+#define E_NOPARM       2
+#define E_NOMEM                3
+
+static int
+get_cmdvec (char * cmd, vector *v)
+{
+       int i;
+       int r = 0;
+       int get_param = 0;
+       char * buff;
+       struct key * kw = NULL;
+       struct key * cmdkw = NULL;
+       vector cmdvec, strvec;
+
+       strvec = alloc_strvec(cmd);
+       if (!strvec)
+               return 0;
+
+       cmdvec = vector_alloc();
+       *v = cmdvec;
+
+       if (!cmdvec) {
+               free_strvec(strvec);
+               return E_NOMEM;
+       }
+
+       vector_foreach_slot(strvec, buff, i) {
+               if (*buff == '"')
+                       continue;
+               if (get_param) {
+                       get_param = 0;
+                       cmdkw->param = strdup(buff);
+                       continue;
+               }
+               kw = find_key(buff);
+               if (!kw) {
+                       r = E_SYNTAX;
+                       goto out;
+               }
+               cmdkw = alloc_key();
+               if (!cmdkw) {
+                       r = E_NOMEM;
+                       goto out;
+               }
+               if (!vector_alloc_slot(cmdvec)) {
+                       FREE(cmdkw);
+                       r = E_NOMEM;
+                       goto out;
+               }
+               vector_set_slot(cmdvec, cmdkw);
+               cmdkw->code = kw->code;
+               cmdkw->has_param = kw->has_param;
+               if (kw->has_param)
+                       get_param = 1;
+       }
+       if (get_param) {
+               r = E_NOPARM;
+               goto out;
+       }
+       free_strvec(strvec);
+       return 0;
+
+out:
+       free_strvec(strvec);
+       free_keys(cmdvec);
+       *v = NULL;
+       return r;
+}
+
+static int 
+fingerprint(vector vec)
+{
+       int i;
+       int fp = 0;
+       struct key * kw;
+
+       if (!vec)
+               return 0;
+
+       vector_foreach_slot(vec, kw, i)
+               fp += kw->code;
+
+       return fp;
+}
+
+int
+alloc_handlers (void)
+{
+       handlers = vector_alloc();
+
+       if (!handlers)
+               return 1;
+
+       return 0;
+}
+
+static int
+genhelp_sprint_aliases (char * reply, vector keys, struct key * refkw)
+{
+       int i, fwd = 0;
+       struct key * kw;
+
+       vector_foreach_slot (keys, kw, i)
+               if (kw->code == refkw->code && kw != refkw)
+                       fwd += sprintf(reply, "|%s", kw->str);
+
+       return fwd;
+}
+
+static char *
+genhelp_handler (void)
+{
+       int i, j;
+       int fp;
+       struct handler * h;
+       struct key * kw;
+       char * reply;
+       char * p;
+
+       reply = MALLOC(INITIAL_REPLY_LEN);
+
+       if (!reply)
+               return NULL;
+
+       p = reply;
+       p += sprintf(p, VERSION_STRING);
+       p += sprintf(p, "CLI commands reference:\n");
+
+       vector_foreach_slot (handlers, h, i) {
+               fp = h->fingerprint;
+               vector_foreach_slot (keys, kw, j) {
+                       if ((kw->code & fp)) {
+                               fp -= kw->code;
+                               p += sprintf(p, " %s", kw->str);
+                               p += genhelp_sprint_aliases(p, keys, kw);
+
+                               if (kw->has_param)
+                                       p += sprintf(p, " $%s", kw->str);
+                       }
+               }
+               p += sprintf(p, "\n");
+       }
+
+       return reply;
+}
+
+int
+parse_cmd (char * cmd, char ** reply, int * len, void * data)
+{
+       int r;
+       struct handler * h;
+       vector cmdvec;
+
+       r = get_cmdvec(cmd, &cmdvec);
+
+       if (r) {
+               if (cmdvec)
+                       free_keys(cmdvec);
+               *reply = genhelp_handler();
+               *len = strlen(*reply) + 1;
+               return 0;
+       }
+
+       h = find_handler(fingerprint(cmdvec));
+
+       if (!h) {
+               *reply = genhelp_handler();
+               *len = strlen(*reply) + 1;
+               free_keys(cmdvec);
+               return 0;
+       }
+
+       /*
+        * execute handler
+        */
+       r = h->fn(cmdvec, reply, len, data);
+       free_keys(cmdvec);
+
+       return r;
+}
+
+char *
+get_keyparam (vector v, int code)
+{
+       struct key * kw;
+       int i;
+
+       vector_foreach_slot(v, kw, i)
+               if (kw->code == code)
+                       return kw->param;
+
+       return NULL;
+}
+
+int
+cli_init (void) {
+       if (load_keys())
+               return 1;
+
+       if (alloc_handlers())
+               return 1;
+
+       add_handler(LIST+PATHS, NULL);
+       add_handler(LIST+PATHS+FMT, NULL);
+       add_handler(LIST+STATUS, NULL);
+       add_handler(LIST+MAPS, NULL);
+       add_handler(LIST+MAPS+STATUS, NULL);
+       add_handler(LIST+MAPS+STATS, NULL);
+       add_handler(LIST+MAPS+FMT, NULL);
+       add_handler(LIST+MAPS+TOPOLOGY, NULL);
+       add_handler(LIST+TOPOLOGY, NULL);
+       add_handler(LIST+MAP+TOPOLOGY, NULL);
+       add_handler(LIST+CONFIG, NULL);
+       add_handler(LIST+BLACKLIST, NULL);
+       add_handler(LIST+DEVICES, NULL);
+       add_handler(LIST+WILDCARDS, NULL);
+       add_handler(ADD+PATH, NULL);
+       add_handler(DEL+PATH, NULL);
+       add_handler(ADD+MAP, NULL);
+       add_handler(DEL+MAP, NULL);
+       add_handler(SWITCH+MAP+GROUP, NULL);
+       add_handler(RECONFIGURE, NULL);
+       add_handler(SUSPEND+MAP, NULL);
+       add_handler(RESUME+MAP, NULL);
+       add_handler(RESIZE+MAP, NULL);
+       add_handler(DISABLEQ+MAP, NULL);
+       add_handler(RESTOREQ+MAP, NULL);
+       add_handler(DISABLEQ+MAPS, NULL);
+       add_handler(RESTOREQ+MAPS, NULL);
+       add_handler(REINSTATE+PATH, NULL);
+       add_handler(FAIL+PATH, NULL);
+       add_handler(QUIT, NULL);
+
+       return 0;
+}
+
+static int
+key_match_fingerprint (struct key * kw, int fp)
+{
+       if (!fp)
+               return 0;
+
+       return ((fp & kw->code) == kw->code);
+}
+
+/*
+ * This is the readline completion handler
+ */
+char *
+key_generator (const char * str, int state)
+{
+       static int index, len, rlfp, has_param;
+       struct key * kw;
+       int i;
+       struct handler *h;
+       vector v;
+
+       if (!state) {
+               index = 0;
+               has_param = 0;
+               rlfp = 0;
+               len = strlen(str);
+               int r = get_cmdvec(rl_line_buffer, &v);
+               /*
+                * If a word completion is in progess, we don't want
+                * to take an exact keyword match in the fingerprint.
+                * For ex "show map[tab]" would validate "map" and discard
+                * "maps" as a valid candidate.
+                */
+               if (v && len)
+                       vector_del_slot(v, VECTOR_SIZE(v) - 1);
+               /*
+                * Clean up the mess if we dropped the last slot of a 1-slot
+                * vector
+                */
+               if (v && !VECTOR_SIZE(v)) {
+                       vector_free(v);
+                       v = NULL;
+               }
+               /*
+                * If last keyword takes a param, don't even try to guess
+                */
+               if (r == E_NOPARM) {
+                       has_param = 1;
+                       return (strdup("(value)"));
+               }
+               /*
+                * Compute a command fingerprint to find out possible completions.
+                * Once done, the vector is useless. Free it.
+                */
+               if (v) {
+                       rlfp = fingerprint(v);
+                       free_keys(v);
+               }
+       }
+       /*
+        * No more completions for parameter placeholder.
+        * Brave souls might try to add parameter completion by walking paths and
+        * multipaths vectors.
+        */
+       if (has_param)
+               return ((char *)NULL);
+       /*
+        * Loop through keywords for completion candidates
+        */
+       vector_foreach_slot_after (keys, kw, index) {
+               if (!strncmp(kw->str, str, len)) {
+                       /*
+                        * Discard keywords already in the command line
+                        */
+                       if (key_match_fingerprint(kw, rlfp)) {
+                               struct key * curkw = find_key(str);
+                               if (!curkw || (curkw != kw))
+                                       continue;
+                       }
+                       /*
+                        * Discard keywords making syntax errors.
+                        *
+                        * nfp is the candidate fingerprint we try to
+                        * validate against all known command fingerprints.
+                        */
+                       int nfp = rlfp | kw->code;
+                       vector_foreach_slot(handlers, h, i) {
+                               if (!rlfp || ((h->fingerprint & nfp) == nfp)) {
+                                       /*
+                                        * At least one full command is
+                                        * possible with this keyword :
+                                        * Consider it validated
+                                        */
+                                       index++;
+                                       return (strdup(kw->str));
+                               }
+                       }
+               }
+       }
+       /*
+        * No more candidates
+        */
+       return ((char *)NULL);
+}
+
diff --git a/multipathd/cli.h b/multipathd/cli.h
new file mode 100644 (file)
index 0000000..1a19e4c
--- /dev/null
@@ -0,0 +1,84 @@
+enum {
+       __LIST,
+       __ADD,
+       __DEL,
+       __SWITCH,
+       __SUSPEND,
+       __RESUME,
+       __REINSTATE,
+       __FAIL,
+       __RESIZE,
+       __DISABLEQ,
+       __RESTOREQ,
+       __PATHS,
+       __MAPS,
+       __PATH,
+       __MAP,
+       __GROUP,
+       __RECONFIGURE,
+       __STATUS,
+       __STATS,
+       __TOPOLOGY,
+       __CONFIG,
+       __BLACKLIST,
+       __DEVICES,
+       __FMT,
+       __WILDCARDS,
+       __QUIT,
+};
+
+#define LIST           (1 << __LIST)
+#define ADD            (1 << __ADD)
+#define DEL            (1 << __DEL)
+#define SWITCH         (1 << __SWITCH)
+#define SUSPEND                (1 << __SUSPEND)
+#define RESUME         (1 << __RESUME)
+#define REINSTATE      (1 << __REINSTATE)
+#define FAIL           (1 << __FAIL)
+#define RESIZE         (1 << __RESIZE)
+#define DISABLEQ       (1 << __DISABLEQ)
+#define RESTOREQ       (1 << __RESTOREQ)
+#define PATHS          (1 << __PATHS)
+#define MAPS           (1 << __MAPS)
+#define PATH           (1 << __PATH)
+#define MAP            (1 << __MAP)
+#define GROUP          (1 << __GROUP)
+#define RECONFIGURE    (1 << __RECONFIGURE)
+#define STATUS         (1 << __STATUS)
+#define STATS          (1 << __STATS)
+#define TOPOLOGY       (1 << __TOPOLOGY)
+#define CONFIG         (1 << __CONFIG)
+#define BLACKLIST      (1 << __BLACKLIST)
+#define DEVICES        (1 << __DEVICES)
+#define FMT            (1 << __FMT)
+#define COUNT          (1 << __COUNT)
+#define WILDCARDS      (1 << __WILDCARDS)
+#define QUIT           (1 << __QUIT)
+
+#define INITIAL_REPLY_LEN 1000
+
+struct key {
+       char * str;
+       char * param;
+       int code;
+       int has_param;
+};
+
+struct handler {
+       unsigned long fingerprint;
+       int (*fn)(void *, char **, int *, void *);
+};
+
+vector keys;
+vector handlers;
+
+int alloc_handlers (void);
+int add_handler (int fp, int (*fn)(void *, char **, int *, void *));
+int set_handler_callback (int fp, int (*fn)(void *, char **, int *, void *));
+int parse_cmd (char * cmd, char ** reply, int * len, void *);
+int load_keys (void);
+char * get_keyparam (vector v, int code);
+void free_keys (vector vec);
+void free_handlers (vector vec);
+int cli_init (void);
+char * key_generator (const char * str, int state);
diff --git a/multipathd/cli_handlers.c b/multipathd/cli_handlers.c
new file mode 100644 (file)
index 0000000..71a73ff
--- /dev/null
@@ -0,0 +1,768 @@
+/*
+ * Copyright (c) 2005 Christophe Varoqui
+ */
+#include <checkers.h>
+#include <memory.h>
+#include <vector.h>
+#include <structs.h>
+#include <structs_vec.h>
+#include <libdevmapper.h>
+#include <devmapper.h>
+#include <config.h>
+#include <configure.h>
+#include <blacklist.h>
+#include <debug.h>
+#include <print.h>
+#include <sysfs.h>
+#include <errno.h>
+
+#include "main.h"
+#include "cli.h"
+
+int
+show_paths (char ** r, int * len, struct vectors * vecs, char * style)
+{
+       int i;
+       struct path * pp;
+       char * c;
+       char * reply;
+       unsigned int maxlen = INITIAL_REPLY_LEN;
+       int again = 1;
+
+       get_path_layout(vecs->pathvec, 1);
+       reply = MALLOC(maxlen);
+
+       while (again) {
+               if (!reply)
+                       return 1;
+
+               c = reply;
+
+               if (VECTOR_SIZE(vecs->pathvec) > 0)
+                       c += snprint_path_header(c, reply + maxlen - c,
+                                                style);
+
+               vector_foreach_slot(vecs->pathvec, pp, i)
+                       c += snprint_path(c, reply + maxlen - c,
+                                         style, pp);
+
+               again = ((c - reply) == (maxlen - 1));
+
+               if (again)
+                       reply = REALLOC(reply, maxlen *= 2);
+
+       }
+       *r = reply;
+       *len = (int)(c - reply + 1);
+       return 0;
+}
+
+int
+show_map_topology (char ** r, int * len, struct multipath * mpp)
+{
+       char * c;
+       char * reply;
+       unsigned int maxlen = INITIAL_REPLY_LEN;
+       int again = 1;
+
+       reply = MALLOC(maxlen);
+
+       while (again) {
+               if (!reply)
+                       return 1;
+
+               c = reply;
+
+               c += snprint_multipath_topology(c, reply + maxlen - c, mpp, 2);
+               again = ((c - reply) == (maxlen - 1));
+
+               if (again)
+                       reply = REALLOC(reply, maxlen *= 2);
+
+       }
+       *r = reply;
+       *len = (int)(c - reply + 1);
+       return 0;
+}
+
+int
+show_maps_topology (char ** r, int * len, struct vectors * vecs)
+{
+       int i;
+       struct multipath * mpp;
+       char * c;
+       char * reply;
+       unsigned int maxlen = INITIAL_REPLY_LEN;
+       int again = 1;
+       get_path_layout(vecs->pathvec, 0);
+       reply = MALLOC(maxlen);
+
+       while (again) {
+               if (!reply)
+                       return 1;
+
+               c = reply;
+
+               vector_foreach_slot(vecs->mpvec, mpp, i)
+                       c += snprint_multipath_topology(c, reply + maxlen - c,
+                                                       mpp, 2);
+
+               again = ((c - reply) == (maxlen - 1));
+
+               if (again)
+                       reply = REALLOC(reply, maxlen *= 2);
+
+       }
+       *r = reply;
+       *len = (int)(c - reply + 1);
+       return 0;
+}
+
+int
+show_config (char ** r, int * len)
+{
+       char * c;
+       char * reply;
+       unsigned int maxlen = INITIAL_REPLY_LEN;
+       int again = 1;
+
+       reply = MALLOC(maxlen);
+
+       while (again) {
+               if (!reply)
+                       return 1;
+               c = reply;
+               c += snprint_defaults(c, reply + maxlen - c);
+               again = ((c - reply) == maxlen);
+               if (again) {
+                       reply = REALLOC(reply, maxlen *= 2);
+                       continue;
+               }
+               c += snprint_blacklist(c, reply + maxlen - c);
+               again = ((c - reply) == maxlen);
+               if (again) {
+                       reply = REALLOC(reply, maxlen *= 2);
+                       continue;
+               }
+               c += snprint_blacklist_except(c, reply + maxlen - c);
+               again = ((c - reply) == maxlen);
+               if (again) {
+                       reply = REALLOC(reply, maxlen *= 2);
+                       continue;
+               }
+               c += snprint_hwtable(c, reply + maxlen - c, conf->hwtable);
+               again = ((c - reply) == maxlen);
+               if (again) {
+                       reply = REALLOC(reply, maxlen *= 2);
+                       continue;
+               }
+               c += snprint_mptable(c, reply + maxlen - c, conf->mptable);
+               again = ((c - reply) == maxlen);
+               if (again)
+                       reply = REALLOC(reply, maxlen *= 2);
+       }
+       *r = reply;
+       *len = (int)(c - reply + 1);
+       return 0;
+}
+
+int
+cli_list_config (void * v, char ** reply, int * len, void * data)
+{
+       condlog(3, "list config (operator)");
+
+       return show_config(reply, len);
+}
+
+int
+cli_list_paths (void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+
+       condlog(3, "list paths (operator)");
+
+       return show_paths(reply, len, vecs, PRINT_PATH_CHECKER);
+}
+
+int
+cli_list_paths_fmt (void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+       char * fmt = get_keyparam(v, FMT);
+
+       condlog(3, "list paths (operator)");
+
+       return show_paths(reply, len, vecs, fmt);
+}
+
+int
+cli_list_map_topology (void * v, char ** reply, int * len, void * data)
+{
+       struct multipath * mpp;
+       struct vectors * vecs = (struct vectors *)data;
+       char * param = get_keyparam(v, MAP);
+       
+       get_path_layout(vecs->pathvec, 0);
+       mpp = find_mp_by_str(vecs->mpvec, param);
+
+       if (!mpp)
+               return 1;
+
+       condlog(3, "list multipath %s (operator)", param);
+
+       return show_map_topology(reply, len, mpp);
+}
+
+int
+cli_list_maps_topology (void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+
+       condlog(3, "list multipaths (operator)");
+
+       return show_maps_topology(reply, len, vecs);
+}
+
+int
+cli_list_wildcards (void * v, char ** reply, int * len, void * data)
+{
+       char * c;
+
+       *reply = MALLOC(INITIAL_REPLY_LEN);
+
+       if (!*reply)
+               return 1;
+
+       c = *reply;
+       c += snprint_wildcards(c, INITIAL_REPLY_LEN);
+
+       *len = INITIAL_REPLY_LEN;
+       return 0;
+}
+
+int
+show_status (char ** r, int *len, struct vectors * vecs)
+{
+       char * c;
+       char * reply;
+
+       unsigned int maxlen = INITIAL_REPLY_LEN;
+       reply = MALLOC(maxlen);
+
+       if (!reply)
+               return 1;
+
+       c = reply;
+       c += snprint_status(c, reply + maxlen - c, vecs);
+
+       *r = reply;
+       *len = (int)(c - reply + 1);
+       return 0;
+}
+
+int
+show_maps (char ** r, int *len, struct vectors * vecs, char * style)
+{
+       int i;
+       struct multipath * mpp;
+       char * c;
+       char * reply;
+       unsigned int maxlen = INITIAL_REPLY_LEN;
+       int again = 1;
+
+       get_multipath_layout(vecs->mpvec, 1);
+       reply = MALLOC(maxlen);
+
+       while (again) {
+               if (!reply)
+                       return 1;
+
+               c = reply;
+               if (VECTOR_SIZE(vecs->mpvec) > 0)
+                       c += snprint_multipath_header(c, reply + maxlen - c,
+                                                     style);
+
+               vector_foreach_slot(vecs->mpvec, mpp, i)
+                       c += snprint_multipath(c, reply + maxlen - c,
+                                              style, mpp);
+
+               again = ((c - reply) == (maxlen - 1));
+
+               if (again)
+                       reply = REALLOC(reply, maxlen *= 2);
+       }
+       *r = reply;
+       *len = (int)(c - reply + 1);
+       return 0;
+}
+
+int
+cli_list_maps_fmt (void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+       char * fmt = get_keyparam(v, FMT);
+
+       condlog(3, "list maps (operator)");
+
+       return show_maps(reply, len, vecs, fmt);
+}
+
+int
+cli_list_maps (void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+
+       condlog(3, "list maps (operator)");
+
+       return show_maps(reply, len, vecs, PRINT_MAP_NAMES);
+}
+
+int
+cli_list_status (void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+
+       condlog(3, "list status (operator)");
+
+       return show_status(reply, len, vecs);
+}
+
+int
+cli_list_maps_status (void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+
+       condlog(3, "list maps status (operator)");
+
+       return show_maps(reply, len, vecs, PRINT_MAP_STATUS);
+}
+
+int
+cli_list_maps_stats (void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+
+       condlog(3, "list maps stats (operator)");
+
+       return show_maps(reply, len, vecs, PRINT_MAP_STATS);
+}
+
+int
+cli_add_path (void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+       char * param = get_keyparam(v, PATH);
+       int r;
+
+       condlog(2, "%s: add path (operator)", param);
+
+       if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
+           param) > 0 || (r = ev_add_path(param, vecs)) == 2) {
+               *reply = strdup("blacklisted\n");
+               *len = strlen(*reply) + 1;
+               condlog(2, "%s: path blacklisted", param);
+               return 0;
+       }
+       return r;
+}
+
+int
+cli_del_path (void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+       char * param = get_keyparam(v, PATH);
+
+       condlog(2, "%s: remove path (operator)", param);
+
+       return ev_remove_path(param, vecs);
+}
+
+int
+cli_add_map (void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+       char * param = get_keyparam(v, MAP);
+       int minor;
+       char dev_path[PATH_SIZE];
+       struct sysfs_device *sysdev;
+
+       condlog(2, "%s: add map (operator)", param);
+
+       if (filter_wwid(conf->blist_wwid, conf->elist_wwid, param) > 0) {
+               *reply = strdup("blacklisted\n");
+               *len = strlen(*reply) + 1;
+               condlog(2, "%s: map blacklisted", param);
+               return 0;
+       }
+       minor = dm_get_minor(param);
+       if (minor < 0) {
+               condlog(2, "%s: not a device mapper table", param);
+               return 0;
+       }
+       sprintf(dev_path,"/block/dm-%d", minor);
+       sysdev = sysfs_device_get(dev_path);
+       if (!sysdev) {
+               condlog(2, "%s: not found in sysfs", param);
+               return 0;
+       }
+       return ev_add_map(sysdev, vecs);
+}
+
+int
+cli_del_map (void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+       char * param = get_keyparam(v, MAP);
+
+       condlog(2, "%s: remove map (operator)", param);
+
+       return ev_remove_map(param, vecs);
+}
+
+int resize_map(struct multipath *mpp, unsigned long long size,
+              struct vectors * vecs)
+{
+       mpp->size = size;
+       update_mpp_paths(mpp, vecs->pathvec);
+       setup_map(mpp);
+       mpp->action = ACT_RESIZE;
+       if (domap(mpp) <= 0) {
+               condlog(0, "%s: failed to resize map : %s", mpp->alias,
+                       strerror(errno));
+               return 1;
+       }
+       return 0;
+}
+
+int
+cli_resize(void *v, char **reply, int *len, void *data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+       char * mapname = get_keyparam(v, MAP);
+       struct multipath *mpp;
+       int minor;
+       unsigned long long size;
+       struct pathgroup *pgp;
+       struct path *pp;
+
+       condlog(2, "%s: resize map (operator)", mapname);
+       if (sscanf(mapname, "dm-%d", &minor) == 1)
+               mpp = find_mp_by_minor(vecs->mpvec, minor);
+       else
+               mpp = find_mp_by_alias(vecs->mpvec, mapname);
+
+       if (!mpp) {
+               condlog(0, "%s: invalid map name. cannot resize", mapname);
+               return 1;
+       }
+
+       pgp = VECTOR_SLOT(mpp->pg, 0);
+       pp = VECTOR_SLOT(pgp->paths, 0);
+       if (sysfs_get_size(pp->sysdev, &size)) {
+               condlog(0, "%s: couldn't get size for sysfs. cannot resize",
+                       mapname);
+               return 1;
+       }
+       if (size == mpp->size) {
+               condlog(0, "%s: map is still the same size (%llu)", mapname,
+                       mpp->size);
+               return 0;
+       }
+       condlog(3, "%s old size is %llu, new size is %llu", mapname, mpp->size,
+               size);
+
+       if (resize_map(mpp, size, vecs) != 0)
+               return 1;
+
+       dm_lib_release();
+       setup_multipath(vecs, mpp);
+       sync_map_state(mpp);
+
+       return 0;
+}
+
+int
+cli_restore_queueing(void *v, char **reply, int *len, void *data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+       char * mapname = get_keyparam(v, MAP);
+       struct multipath *mpp;
+       int minor;
+
+       condlog(2, "%s: restore map queueing (operator)", mapname);
+       if (sscanf(mapname, "dm-%d", &minor) == 1)
+               mpp = find_mp_by_minor(vecs->mpvec, minor);
+       else
+               mpp = find_mp_by_alias(vecs->mpvec, mapname);
+
+       if (!mpp) {
+               condlog(0, "%s: invalid map name, cannot restore queueing", mapname);
+               return 1;
+       }
+
+       if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF &&
+                       mpp->no_path_retry != NO_PATH_RETRY_FAIL) {
+               dm_queue_if_no_path(mpp->alias, 1);
+               if (mpp->nr_active > 0)
+                       mpp->retry_tick = 0;
+               else
+                       mpp->retry_tick = mpp->no_path_retry * conf->checkint;
+       }
+       return 0;
+}
+
+int
+cli_restore_all_queueing(void *v, char **reply, int *len, void *data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+       struct multipath *mpp;
+       int i;
+
+       condlog(2, "restore queueing (operator)");
+       vector_foreach_slot(vecs->mpvec, mpp, i) {
+               if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF &&
+                   mpp->no_path_retry != NO_PATH_RETRY_FAIL) {
+                       dm_queue_if_no_path(mpp->alias, 1);
+                       if (mpp->nr_active > 0)
+                               mpp->retry_tick = 0;
+                       else
+                               mpp->retry_tick = mpp->no_path_retry * conf->checkint;
+               }
+       }
+       return 0;
+}
+
+int
+cli_disable_queueing(void *v, char **reply, int *len, void *data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+       char * mapname = get_keyparam(v, MAP);
+       struct multipath *mpp;
+       int minor;
+
+       condlog(2, "%s: disable map queueing (operator)", mapname);
+       if (sscanf(mapname, "dm-%d", &minor) == 1)
+               mpp = find_mp_by_minor(vecs->mpvec, minor);
+       else
+               mpp = find_mp_by_alias(vecs->mpvec, mapname);
+
+       if (!mpp) {
+               condlog(0, "%s: invalid map name, cannot disable queueing", mapname);
+               return 1;
+       }
+
+       mpp->retry_tick = 0;
+       dm_queue_if_no_path(mpp->alias, 0);
+       return 0;
+}
+
+int
+cli_disable_all_queueing(void *v, char **reply, int *len, void *data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+       struct multipath *mpp;
+       int i;
+
+       condlog(2, "disable queueing (operator)");
+       vector_foreach_slot(vecs->mpvec, mpp, i) {
+               mpp->retry_tick = 0;
+               dm_queue_if_no_path(mpp->alias, 0);
+       }
+       return 0;
+}
+
+int
+cli_switch_group(void * v, char ** reply, int * len, void * data)
+{
+       char * mapname = get_keyparam(v, MAP);
+       int groupnum = atoi(get_keyparam(v, GROUP));
+
+       condlog(2, "%s: switch to path group #%i (operator)", mapname, groupnum);
+
+       return dm_switchgroup(mapname, groupnum);
+}
+
+int
+cli_reconfigure(void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+
+       condlog(2, "reconfigure (operator)");
+
+       return reconfigure(vecs);
+}
+
+int
+cli_suspend(void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+       char * param = get_keyparam(v, MAP);
+       int r = dm_simplecmd_noflush(DM_DEVICE_SUSPEND, param);
+
+       condlog(2, "%s: suspend (operator)", param);
+
+       if (!r) /* error */
+               return 1;
+
+       struct multipath * mpp = find_mp_by_alias(vecs->mpvec, param);
+
+       if (!mpp)
+               return 1;
+
+       dm_get_info(param, &mpp->dmi);
+       return 0;
+}
+
+int
+cli_resume(void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+       char * param = get_keyparam(v, MAP);
+       int r = dm_simplecmd_noflush(DM_DEVICE_RESUME, param);
+
+       condlog(2, "%s: resume (operator)", param);
+
+       if (!r) /* error */
+               return 1;
+
+       struct multipath * mpp = find_mp_by_alias(vecs->mpvec, param);
+
+       if (!mpp)
+               return 1;
+
+       dm_get_info(param, &mpp->dmi);
+       return 0;
+}
+
+int
+cli_reinstate(void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+       char * param = get_keyparam(v, PATH);
+       struct path * pp;
+
+       pp = find_path_by_dev(vecs->pathvec, param);
+
+       if (!pp)
+                pp = find_path_by_devt(vecs->pathvec, param);
+
+       if (!pp || !pp->mpp || !pp->mpp->alias)
+               return 1;
+
+       condlog(2, "%s: reinstate path %s (operator)",
+               pp->mpp->alias, pp->dev_t);
+
+       checker_enable(&pp->checker);
+       return dm_reinstate_path(pp->mpp->alias, pp->dev_t);
+}
+
+int
+cli_fail(void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+       char * param = get_keyparam(v, PATH);
+       struct path * pp;
+       int r;
+
+       pp = find_path_by_dev(vecs->pathvec, param);
+
+       if (!pp)
+                pp = find_path_by_devt(vecs->pathvec, param);
+
+       if (!pp || !pp->mpp || !pp->mpp->alias)
+               return 1;
+
+       condlog(2, "%s: fail path %s (operator)",
+               pp->mpp->alias, pp->dev_t);
+
+       r = dm_fail_path(pp->mpp->alias, pp->dev_t);
+       /*
+        * Suspend path checking to avoid auto-reinstating the path
+        */
+       if (!r)
+               checker_disable(&pp->checker);
+       return r;
+}
+
+int
+show_blacklist (char ** r, int * len)
+{
+       char *c = NULL;
+       char *reply = NULL;
+       unsigned int maxlen = INITIAL_REPLY_LEN;
+       int again = 1;
+
+       while (again) {
+               reply = MALLOC(maxlen);
+               if (!reply)
+                       return 1;
+
+               c = reply;
+               c += snprint_blacklist_report(c, maxlen);
+               again = ((c - reply) == maxlen);
+               if (again) {
+                       maxlen  *= 2;
+                       FREE(reply);
+                       continue;
+               }
+       }
+
+       *r = reply;
+       *len = (int)(c - reply + 1);
+
+       return 0;
+}
+
+int
+cli_list_blacklist (void * v, char ** reply, int * len, void * data)
+{
+       condlog(3, "list blacklist (operator)");
+
+       return show_blacklist(reply, len);
+}
+
+int
+show_devices (char ** r, int * len, struct vectors *vecs)
+{
+       char *c = NULL;
+       char *reply = NULL;
+       unsigned int maxlen = INITIAL_REPLY_LEN;
+       int again = 1;
+
+       while (again) {
+               reply = MALLOC(maxlen);
+               if (!reply)
+                       return 1;
+
+               c = reply;
+               c += snprint_devices(c, maxlen, vecs);
+               again = ((c - reply) == maxlen);
+               if (again) {
+                       maxlen  *= 2;
+                       FREE(reply);
+                       continue;
+               }
+       }
+
+       *r = reply;
+       *len = (int)(c - reply + 1);
+
+       return 0;
+}
+
+int
+cli_list_devices (void * v, char ** reply, int * len, void * data)
+{
+       struct vectors * vecs = (struct vectors *)data;
+
+       condlog(3, "list devices (operator)");
+
+       return show_devices(reply, len, vecs);
+}
+
+int
+cli_quit (void * v, char ** reply, int * len, void * data)
+{
+       return 0;
+}
diff --git a/multipathd/cli_handlers.h b/multipathd/cli_handlers.h
new file mode 100644 (file)
index 0000000..b3ad377
--- /dev/null
@@ -0,0 +1,29 @@
+int cli_list_paths (void * v, char ** reply, int * len, void * data);
+int cli_list_paths_fmt (void * v, char ** reply, int * len, void * data);
+int cli_list_status (void * v, char ** reply, int * len, void * data);
+int cli_list_maps (void * v, char ** reply, int * len, void * data);
+int cli_list_maps_fmt (void * v, char ** reply, int * len, void * data);
+int cli_list_maps_status (void * v, char ** reply, int * len, void * data);
+int cli_list_maps_stats (void * v, char ** reply, int * len, void * data);
+int cli_list_map_topology (void * v, char ** reply, int * len, void * data);
+int cli_list_maps_topology (void * v, char ** reply, int * len, void * data);
+int cli_list_config (void * v, char ** reply, int * len, void * data);
+int cli_list_blacklist (void * v, char ** reply, int * len, void * data);
+int cli_list_devices (void * v, char ** reply, int * len, void * data);
+int cli_list_wildcards (void * v, char ** reply, int * len, void * data);
+int cli_add_path (void * v, char ** reply, int * len, void * data);
+int cli_del_path (void * v, char ** reply, int * len, void * data);
+int cli_add_map (void * v, char ** reply, int * len, void * data);
+int cli_del_map (void * v, char ** reply, int * len, void * data);
+int cli_switch_group(void * v, char ** reply, int * len, void * data);
+int cli_reconfigure(void * v, char ** reply, int * len, void * data);
+int cli_resize(void * v, char ** reply, int * len, void * data);
+int cli_disable_queueing(void * v, char ** reply, int * len, void * data);
+int cli_disable_all_queueing(void * v, char ** reply, int * len, void * data);
+int cli_restore_queueing(void * v, char ** reply, int * len, void * data);
+int cli_restore_all_queueing(void * v, char ** reply, int * len, void * data);
+int cli_suspend(void * v, char ** reply, int * len, void * data);
+int cli_resume(void * v, char ** reply, int * len, void * data);
+int cli_reinstate(void * v, char ** reply, int * len, void * data);
+int cli_fail(void * v, char ** reply, int * len, void * data);
+int cli_quit(void * v, char ** reply, int * len, void * data);
diff --git a/multipathd/main.c b/multipathd/main.c
new file mode 100644 (file)
index 0000000..6ee7903
--- /dev/null
@@ -0,0 +1,1620 @@
+/*
+ * Copyright (c) 2004, 2005 Christophe Varoqui
+ * Copyright (c) 2005 Kiyoshi Ueda, NEC
+ * Copyright (c) 2005 Benjamin Marzinski, Redhat
+ * Copyright (c) 2005 Edward Goggin, EMC
+ */
+#include <unistd.h>
+#include <sys/stat.h>
+#include <libdevmapper.h>
+#include <wait.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <limits.h>
+
+/*
+ * libcheckers
+ */
+#include <checkers.h>
+
+/*
+ * libmultipath
+ */
+#include <parser.h>
+#include <vector.h>
+#include <memory.h>
+#include <config.h>
+#include <util.h>
+#include <hwtable.h>
+#include <defaults.h>
+#include <structs.h>
+#include <callout.h>
+#include <blacklist.h>
+#include <structs_vec.h>
+#include <dmparser.h>
+#include <devmapper.h>
+#include <sysfs.h>
+#include <dict.h>
+#include <discovery.h>
+#include <debug.h>
+#include <propsel.h>
+#include <uevent.h>
+#include <switchgroup.h>
+#include <print.h>
+#include <configure.h>
+#include <prio.h>
+
+#include "main.h"
+#include "pidfile.h"
+#include "uxlsnr.h"
+#include "uxclnt.h"
+#include "cli.h"
+#include "cli_handlers.h"
+#include "lock.h"
+#include "waiter.h"
+
+#define FILE_NAME_SIZE 256
+#define CMDSIZE 160
+
+#define LOG_MSG(a,b) \
+       if (strlen(b)) condlog(a, "%s: %s - %s", pp->mpp->alias, pp->dev, b);
+
+pthread_cond_t exit_cond = PTHREAD_COND_INITIALIZER;
+pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+int logsink;
+
+/*
+ * global copy of vecs for use in sig handlers
+ */
+struct vectors * gvecs;
+
+static int
+need_switch_pathgroup (struct multipath * mpp, int refresh)
+{
+       struct pathgroup * pgp;
+       struct path * pp;
+       unsigned int i, j;
+
+       if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
+               return 0;
+
+       /*
+        * Refresh path priority values
+        */
+       if (refresh)
+               vector_foreach_slot (mpp->pg, pgp, i)
+                       vector_foreach_slot (pgp->paths, pp, j)
+                               pathinfo(pp, conf->hwtable, DI_PRIO);
+
+       mpp->bestpg = select_path_group(mpp);
+
+       if (mpp->bestpg != mpp->nextpg)
+               return 1;
+
+       return 0;
+}
+
+static void
+switch_pathgroup (struct multipath * mpp)
+{
+       mpp->stat_switchgroup++;
+       dm_switchgroup(mpp->alias, mpp->bestpg);
+       condlog(2, "%s: switch to path group #%i",
+                mpp->alias, mpp->bestpg);
+}
+
+static int
+coalesce_maps(struct vectors *vecs, vector nmpv)
+{
+       struct multipath * ompp;
+       vector ompv = vecs->mpvec;
+       unsigned int i;
+       int j;
+
+       vector_foreach_slot (ompv, ompp, i) {
+               if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
+                       /*
+                        * remove all current maps not allowed by the
+                        * current configuration
+                        */
+                       if (dm_flush_map(ompp->alias)) {
+                               condlog(0, "%s: unable to flush devmap",
+                                       ompp->alias);
+                               /*
+                                * may be just because the device is open
+                                */
+                               if (!vector_alloc_slot(nmpv))
+                                       return 1;
+
+                               vector_set_slot(nmpv, ompp);
+                               setup_multipath(vecs, ompp);
+
+                               if ((j = find_slot(ompv, (void *)ompp)) != -1)
+                                       vector_del_slot(ompv, j);
+
+                               continue;
+                       }
+                       else {
+                               dm_lib_release();
+                               condlog(2, "%s devmap removed", ompp->alias);
+                       }
+               }
+       }
+       return 0;
+}
+
+void
+sync_map_state(struct multipath *mpp)
+{
+       struct pathgroup *pgp;
+       struct path *pp;
+       unsigned int i, j;
+
+       if (!mpp->pg)
+               return;
+
+       vector_foreach_slot (mpp->pg, pgp, i){
+               vector_foreach_slot (pgp->paths, pp, j){
+                       if (pp->state == PATH_UNCHECKED || 
+                           pp->state == PATH_WILD)
+                               continue;
+                       if ((pp->dmstate == PSTATE_FAILED ||
+                            pp->dmstate == PSTATE_UNDEF) &&
+                           (pp->state == PATH_UP || pp->state == PATH_GHOST))
+                               dm_reinstate_path(mpp->alias, pp->dev_t);
+                       else if ((pp->dmstate == PSTATE_ACTIVE ||
+                                 pp->dmstate == PSTATE_UNDEF) &&
+                                (pp->state == PATH_DOWN ||
+                                 pp->state == PATH_SHAKY))
+                               dm_fail_path(mpp->alias, pp->dev_t);
+               }
+       }
+}
+
+static void
+sync_maps_state(vector mpvec)
+{
+       unsigned int i;
+       struct multipath *mpp;
+
+       vector_foreach_slot (mpvec, mpp, i)
+               sync_map_state(mpp);
+}
+
+static int
+flush_map(struct multipath * mpp, struct vectors * vecs)
+{
+       /*
+        * clear references to this map before flushing so we can ignore
+        * the spurious uevent we may generate with the dm_flush_map call below
+        */
+       if (dm_flush_map(mpp->alias)) {
+               /*
+                * May not really be an error -- if the map was already flushed
+                * from the device mapper by dmsetup(8) for instance.
+                */
+               condlog(0, "%s: can't flush", mpp->alias);
+               return 1;
+       }
+       else {
+               dm_lib_release();
+               condlog(2, "%s: devmap removed", mpp->alias);
+       }
+
+       orphan_paths(vecs->pathvec, mpp);
+       remove_map_and_stop_waiter(mpp, vecs, 1);
+
+       return 0;
+}
+
+static int
+uev_add_map (struct sysfs_device * dev, struct vectors * vecs)
+{
+       condlog(2, "%s: add map (uevent)", dev->kernel);
+       return ev_add_map(dev, vecs);
+}
+
+int
+ev_add_map (struct sysfs_device * dev, struct vectors * vecs)
+{
+       char * alias;
+       char *dev_t;
+       int major, minor;
+       char * refwwid;
+       struct multipath * mpp;
+       int map_present;
+       int r = 1;
+
+       dev_t = sysfs_attr_get_value(dev->devpath, "dev");
+
+       if (!dev_t || sscanf(dev_t, "%d:%d", &major, &minor) != 2)
+               return 1;
+
+       alias = dm_mapname(major, minor);
+
+       if (!alias)
+               return 1;
+
+       map_present = dm_map_present(alias);
+
+       if (map_present && dm_type(alias, TGT_MPATH) <= 0) {
+               condlog(4, "%s: not a multipath map", alias);
+               FREE(alias);
+               return 0;
+       }
+
+       mpp = find_mp_by_alias(vecs->mpvec, alias);
+
+       if (mpp) {
+               /*
+                * Not really an error -- we generate our own uevent
+                * if we create a multipath mapped device as a result
+                * of uev_add_path
+                */
+               condlog(0, "%s: devmap already registered",
+                       dev->kernel);
+               FREE(alias);
+               return 0;
+       }
+
+       /*
+        * now we can register the map
+        */
+       if (map_present && (mpp = add_map_without_path(vecs, minor, alias))) {
+               sync_map_state(mpp);
+               condlog(2, "%s: devmap %s added", alias, dev->kernel);
+               return 0;
+       }
+       refwwid = get_refwwid(dev->kernel, DEV_DEVMAP, vecs->pathvec);
+
+       if (refwwid) {
+               r = coalesce_paths(vecs, NULL, refwwid, 0);
+               dm_lib_release();
+       }
+
+       if (!r)
+               condlog(2, "%s: devmap %s added", alias, dev->kernel);
+       else
+               condlog(0, "%s: uev_add_map %s failed", alias, dev->kernel);
+
+       FREE(refwwid);
+       FREE(alias);
+       return r;
+}
+
+static int
+uev_remove_map (struct sysfs_device * dev, struct vectors * vecs)
+{
+       condlog(2, "%s: remove map (uevent)", dev->kernel);
+       return ev_remove_map(dev->kernel, vecs);
+}
+
+int
+ev_remove_map (char * devname, struct vectors * vecs)
+{
+       struct multipath * mpp;
+
+       mpp = find_mp_by_str(vecs->mpvec, devname);
+
+       if (!mpp) {
+               condlog(2, "%s: devmap not registered, can't remove",
+                       devname);
+               return 0;
+       }
+       flush_map(mpp, vecs);
+
+       return 0;
+}
+
+static int
+uev_umount_map (struct sysfs_device * dev, struct vectors * vecs)
+{
+       struct multipath * mpp;
+
+       condlog(2, "%s: umount map (uevent)", dev->kernel);
+
+       mpp = find_mp_by_str(vecs->mpvec, dev->kernel);
+
+       if (!mpp)
+               return 0;
+
+       update_mpp_paths(mpp, vecs->pathvec);
+       verify_paths(mpp, vecs, NULL);
+
+       if (!VECTOR_SIZE(mpp->paths))
+               flush_map(mpp, vecs);
+
+       return 0;
+}
+
+static int
+uev_add_path (struct sysfs_device * dev, struct vectors * vecs)
+{
+       condlog(2, "%s: add path (uevent)", dev->kernel);
+       return (ev_add_path(dev->kernel, vecs) != 1)? 0 : 1;
+}
+
+
+/*
+ * returns:
+ * 0: added
+ * 1: error
+ * 2: blacklisted
+ */
+int
+ev_add_path (char * devname, struct vectors * vecs)
+{
+       struct multipath * mpp;
+       struct path * pp;
+       char empty_buff[WWID_SIZE] = {0};
+
+       if (strstr(devname, "..") != NULL) {
+               /*
+                * Don't allow relative device names in the pathvec
+                */
+               condlog(0, "%s: path name is invalid", devname);
+               return 1;
+       }
+
+       pp = find_path_by_dev(vecs->pathvec, devname);
+
+       if (pp) {
+               condlog(0, "%s: spurious uevent, path already in pathvec",
+                       devname);
+               if (pp->mpp)
+                       return 0;
+       }
+       else {
+               /*
+                * get path vital state
+                */
+               if (!(pp = store_pathinfo(vecs->pathvec, conf->hwtable,
+                     devname, DI_ALL))) {
+                       condlog(0, "%s: failed to store path info", devname);
+                       return 1;
+               }
+               pp->checkint = conf->checkint;
+       }
+
+       /*
+        * need path UID to go any further
+        */
+       if (memcmp(empty_buff, pp->wwid, WWID_SIZE) == 0) {
+               condlog(0, "%s: failed to get path uid", devname);
+               return 1; /* leave path added to pathvec */
+       }
+       if (filter_path(conf, pp) > 0){
+               int i = find_slot(vecs->pathvec, (void *)pp);
+               if (i != -1)
+                       vector_del_slot(vecs->pathvec, i);
+               free_path(pp);
+               return 2;
+       }
+       mpp = pp->mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
+rescan:
+       if (mpp) {
+               if ((!pp->size) || (mpp->size != pp->size)) {
+                       if (!pp->size)
+                               condlog(0, "%s: failed to add new path %s, "
+                                       "device size is 0",
+                                       devname, pp->dev);
+                       else
+                               condlog(0, "%s: failed to add new path %s, "
+                                       "device size mismatch",
+                                       devname, pp->dev);
+                       int i = find_slot(vecs->pathvec, (void *)pp);
+                       if (i != -1)
+                               vector_del_slot(vecs->pathvec, i);
+                       free_path(pp);
+                       return 1;
+               }
+
+               condlog(4,"%s: adopting all paths for path %s",
+                       mpp->alias, pp->dev);
+               if (adopt_paths(vecs->pathvec, mpp))
+                       return 1; /* leave path added to pathvec */
+
+               verify_paths(mpp, vecs, NULL);
+               mpp->flush_on_last_del = FLUSH_UNDEF;
+               mpp->action = ACT_RELOAD;
+       }
+       else {
+               if (!pp->size) {
+                       condlog(0, "%s: failed to create new map,"
+                               " %s device size is 0 ", devname, pp->dev);
+                       int i = find_slot(vecs->pathvec, (void *)pp);
+                       if (i != -1)
+                               vector_del_slot(vecs->pathvec, i);
+                       free_path(pp);
+                       return 1;
+               }
+
+               condlog(4,"%s: creating new map", pp->dev);
+               if ((mpp = add_map_with_path(vecs, pp, 1)))
+                       mpp->action = ACT_CREATE;
+               else
+                       return 1; /* leave path added to pathvec */
+       }
+
+       /*
+        * push the map to the device-mapper
+        */
+       if (setup_map(mpp)) {
+               condlog(0, "%s: failed to setup map for addition of new "
+                       "path %s", mpp->alias, devname);
+               goto out;
+       }
+       /*
+        * reload the map for the multipath mapped device
+        */
+       if (domap(mpp) <= 0) {
+               condlog(0, "%s: failed in domap for addition of new "
+                       "path %s", mpp->alias, devname);
+               /*
+                * deal with asynchronous uevents :((
+                */
+               if (mpp->action == ACT_RELOAD) {
+                       condlog(0, "%s: uev_add_path sleep", mpp->alias);
+                       sleep(1);
+                       update_mpp_paths(mpp, vecs->pathvec);
+                       goto rescan;
+               }
+               else
+                       goto out;
+       }
+       dm_lib_release();
+
+       /*
+        * update our state from kernel regardless of create or reload
+        */
+       if (setup_multipath(vecs, mpp))
+               goto out;
+
+       sync_map_state(mpp);
+
+       if (mpp->action == ACT_CREATE &&
+           start_waiter_thread(mpp, vecs))
+                       goto out;
+
+       condlog(2, "%s path added to devmap %s", devname, mpp->alias);
+       return 0;
+
+out:
+       remove_map(mpp, vecs, 1);
+       return 1;
+}
+
+static int
+uev_remove_path (struct sysfs_device * dev, struct vectors * vecs)
+{
+       int retval;
+
+       condlog(2, "%s: remove path (uevent)", dev->kernel);
+       retval = ev_remove_path(dev->kernel, vecs);
+       if (!retval)
+               sysfs_device_put(dev);
+
+       return retval;
+}
+
+int
+ev_remove_path (char * devname, struct vectors * vecs)
+{
+       struct multipath * mpp;
+       struct path * pp;
+       int i, retval = 0;
+
+       pp = find_path_by_dev(vecs->pathvec, devname);
+
+       if (!pp) {
+               /* Not an error; path might have been purged earlier */
+               condlog(0, "%s: path already removed", devname);
+               return 0;
+       }
+
+       /*
+        * avoid referring to the map of an orphaned path
+        */
+       if ((mpp = pp->mpp)) {
+               /*
+                * transform the mp->pg vector of vectors of paths
+                * into a mp->params string to feed the device-mapper
+                */
+               if (update_mpp_paths(mpp, vecs->pathvec)) {
+                       condlog(0, "%s: failed to update paths",
+                               mpp->alias);
+                       goto fail;
+               }
+               if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
+                       vector_del_slot(mpp->paths, i);
+
+               /*
+                * remove the map IFF removing the last path
+                */
+               if (VECTOR_SIZE(mpp->paths) == 0) {
+                       char alias[WWID_SIZE];
+
+                       /*
+                        * flush_map will fail if the device is open
+                        */
+                       strncpy(alias, mpp->alias, WWID_SIZE);
+                       if (mpp->flush_on_last_del == FLUSH_ENABLED) {
+                               condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
+                               mpp->retry_tick = 0;
+                               mpp->no_path_retry = NO_PATH_RETRY_FAIL;
+                               mpp->flush_on_last_del = FLUSH_IN_PROGRESS;
+                               dm_queue_if_no_path(mpp->alias, 0);
+                       }
+                       if (!flush_map(mpp, vecs)) {
+                               condlog(2, "%s: removed map after"
+                                       " removing all paths",
+                                       alias);
+                               retval = 0;
+                               goto out;
+                       }
+                       /*
+                        * Not an error, continue
+                        */
+               }
+
+               if (setup_map(mpp)) {
+                       condlog(0, "%s: failed to setup map for"
+                               " removal of path %s", mpp->alias,
+                               devname);
+                       goto fail;
+               }
+               /*
+                * reload the map
+                */
+               mpp->action = ACT_RELOAD;
+               if (domap(mpp) <= 0) {
+                       condlog(0, "%s: failed in domap for "
+                               "removal of path %s",
+                               mpp->alias, devname);
+                       retval = 1;
+               } else {
+                       /*
+                        * update our state from kernel
+                        */
+                       if (setup_multipath(vecs, mpp)) {
+                               goto fail;
+                       }
+                       sync_map_state(mpp);
+
+                       condlog(2, "%s: path removed from map %s",
+                               devname, mpp->alias);
+               }
+       }
+
+out:
+       if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
+               vector_del_slot(vecs->pathvec, i);
+
+       free_path(pp);
+
+       return retval;
+
+fail:
+       remove_map_and_stop_waiter(mpp, vecs, 1);
+       return 1;
+}
+
+static int
+map_discovery (struct vectors * vecs)
+{
+       struct multipath * mpp;
+       unsigned int i;
+
+       if (dm_get_maps(vecs->mpvec))
+               return 1;
+
+       vector_foreach_slot (vecs->mpvec, mpp, i)
+               if (setup_multipath(vecs, mpp))
+                       return 1;
+
+       return 0;
+}
+
+int
+uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
+{
+       struct vectors * vecs;
+       int r;
+
+       *reply = NULL;
+       *len = 0;
+       vecs = (struct vectors *)trigger_data;
+
+       pthread_cleanup_push(cleanup_lock, &vecs->lock);
+       lock(vecs->lock);
+
+       r = parse_cmd(str, reply, len, vecs);
+
+       if (r > 0) {
+               *reply = STRDUP("fail\n");
+               *len = strlen(*reply) + 1;
+               r = 1;
+       }
+       else if (!r && *len == 0) {
+               *reply = STRDUP("ok\n");
+               *len = strlen(*reply) + 1;
+               r = 0;
+       }
+       /* else if (r < 0) leave *reply alone */
+
+       lock_cleanup_pop(vecs->lock);
+       return r;
+}
+
+static int
+uev_discard(char * devpath)
+{
+       char *tmp;
+       char a[11], b[11];
+
+       /*
+        * keep only block devices, discard partitions
+        */
+       tmp = strstr(devpath, "/block/");
+       if (tmp == NULL){
+               condlog(4, "no /block/ in '%s'", devpath);
+               return 1;
+       }
+       if (sscanf(tmp, "/block/%10s", a) != 1 ||
+           sscanf(tmp, "/block/%10[^/]/%10s", a, b) == 2) {
+               condlog(4, "discard event on %s", devpath);
+               return 1;
+       }
+       return 0;
+}
+
+int
+uev_trigger (struct uevent * uev, void * trigger_data)
+{
+       int r = 0;
+       struct sysfs_device *sysdev;
+       struct vectors * vecs;
+
+       vecs = (struct vectors *)trigger_data;
+
+       if (uev_discard(uev->devpath))
+               return 0;
+
+       sysdev = sysfs_device_get(uev->devpath);
+       if(!sysdev)
+               return 0;
+
+       lock(vecs->lock);
+
+       /*
+        * device map event
+        * Add events are ignored here as the tables
+        * are not fully initialised then.
+        */
+       if (!strncmp(sysdev->kernel, "dm-", 3)) {
+               if (!strncmp(uev->action, "change", 6)) {
+                       r = uev_add_map(sysdev, vecs);
+                       goto out;
+               }
+               if (!strncmp(uev->action, "remove", 6)) {
+                       r = uev_remove_map(sysdev, vecs);
+                       goto out;
+               }
+               if (!strncmp(uev->action, "umount", 6)) {
+                       r = uev_umount_map(sysdev, vecs);
+                       goto out;
+               }
+               goto out;
+       }
+
+       /*
+        * path add/remove event
+        */
+       if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
+                          sysdev->kernel) > 0)
+               goto out;
+
+       if (!strncmp(uev->action, "add", 3)) {
+               r = uev_add_path(sysdev, vecs);
+               goto out;
+       }
+       if (!strncmp(uev->action, "remove", 6)) {
+               r = uev_remove_path(sysdev, vecs);
+               goto out;
+       }
+
+out:
+       unlock(vecs->lock);
+       return r;
+}
+
+static void *
+ueventloop (void * ap)
+{
+       block_signal(SIGUSR1, NULL);
+       block_signal(SIGHUP, NULL);
+
+       if (uevent_listen(&uev_trigger, ap))
+               fprintf(stderr, "error starting uevent listener");
+
+       return NULL;
+}
+
+static void *
+uxlsnrloop (void * ap)
+{
+       block_signal(SIGUSR1, NULL);
+       block_signal(SIGHUP, NULL);
+
+       if (cli_init())
+               return NULL;
+
+       set_handler_callback(LIST+PATHS, cli_list_paths);
+       set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
+       set_handler_callback(LIST+MAPS, cli_list_maps);
+       set_handler_callback(LIST+STATUS, cli_list_status);
+       set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
+       set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
+       set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
+       set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
+       set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
+       set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
+       set_handler_callback(LIST+CONFIG, cli_list_config);
+       set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
+       set_handler_callback(LIST+DEVICES, cli_list_devices);
+       set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
+       set_handler_callback(ADD+PATH, cli_add_path);
+       set_handler_callback(DEL+PATH, cli_del_path);
+       set_handler_callback(ADD+MAP, cli_add_map);
+       set_handler_callback(DEL+MAP, cli_del_map);
+       set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
+       set_handler_callback(RECONFIGURE, cli_reconfigure);
+       set_handler_callback(SUSPEND+MAP, cli_suspend);
+       set_handler_callback(RESUME+MAP, cli_resume);
+       set_handler_callback(RESIZE+MAP, cli_resize);
+       set_handler_callback(REINSTATE+PATH, cli_reinstate);
+       set_handler_callback(FAIL+PATH, cli_fail);
+       set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
+       set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
+       set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
+       set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
+       set_handler_callback(QUIT, cli_quit);
+
+       umask(077);
+       uxsock_listen(&uxsock_trigger, ap);
+
+       return NULL;
+}
+
+static int
+exit_daemon (int status)
+{
+       if (status != 0)
+               fprintf(stderr, "bad exit status. see daemon.log\n");
+
+       condlog(3, "unlink pidfile");
+       unlink(DEFAULT_PIDFILE);
+
+       pthread_mutex_lock(&exit_mutex);
+       pthread_cond_signal(&exit_cond);
+       pthread_mutex_unlock(&exit_mutex);
+
+       return status;
+}
+
+static void
+fail_path (struct path * pp, int del_active)
+{
+       if (!pp->mpp)
+               return;
+
+       condlog(2, "checker failed path %s in map %s",
+                pp->dev_t, pp->mpp->alias);
+
+       dm_fail_path(pp->mpp->alias, pp->dev_t);
+       if (del_active)
+               update_queue_mode_del_path(pp->mpp);
+}
+
+/*
+ * caller must have locked the path list before calling that function
+ */
+static void
+reinstate_path (struct path * pp, int add_active)
+{
+       if (!pp->mpp)
+               return;
+
+       if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
+               condlog(0, "%s: reinstate failed", pp->dev_t);
+       else {
+               condlog(2, "%s: reinstated", pp->dev_t);
+               if (add_active)
+                       update_queue_mode_add_path(pp->mpp);
+       }
+}
+
+static void
+enable_group(struct path * pp)
+{
+       struct pathgroup * pgp;
+
+       /*
+        * if path is added through uev_add_path, pgindex can be unset.
+        * next update_strings() will set it, upon map reload event.
+        *
+        * we can safely return here, because upon map reload, all
+        * PG will be enabled.
+        */
+       if (!pp->mpp->pg || !pp->pgindex)
+               return;
+
+       pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
+
+       if (pgp->status == PGSTATE_DISABLED) {
+               condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
+               dm_enablegroup(pp->mpp->alias, pp->pgindex);
+       }
+}
+
+static void
+mpvec_garbage_collector (struct vectors * vecs)
+{
+       struct multipath * mpp;
+       unsigned int i;
+
+       if (!vecs->mpvec)
+               return;
+
+       vector_foreach_slot (vecs->mpvec, mpp, i) {
+               if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
+                       condlog(2, "%s: remove dead map", mpp->alias);
+                       remove_map_and_stop_waiter(mpp, vecs, 1);
+                       i--;
+               }
+       }
+}
+
+static void
+defered_failback_tick (vector mpvec)
+{
+       struct multipath * mpp;
+       unsigned int i;
+
+       vector_foreach_slot (mpvec, mpp, i) {
+               /*
+                * defered failback getting sooner
+                */
+               if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
+                       mpp->failback_tick--;
+
+                       if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
+                               switch_pathgroup(mpp);
+               }
+       }
+}
+
+static void
+retry_count_tick(vector mpvec)
+{
+       struct multipath *mpp;
+       unsigned int i;
+
+       vector_foreach_slot (mpvec, mpp, i) {
+               if (mpp->retry_tick) {
+                       mpp->stat_total_queueing_time++;
+                       condlog(4, "%s: Retrying.. No active path", mpp->alias);
+                       if(--mpp->retry_tick == 0) {
+                               dm_queue_if_no_path(mpp->alias, 0);
+                               condlog(2, "%s: Disable queueing", mpp->alias);
+                       }
+               }
+       }
+}
+
+void
+check_path (struct vectors * vecs, struct path * pp)
+{
+       int newstate;
+
+       if (!pp->mpp)
+               return;
+
+       if (pp->tick && --pp->tick)
+               return; /* don't check this path yet */
+
+       /*
+        * provision a next check soonest,
+        * in case we exit abnormaly from here
+        */
+       pp->tick = conf->checkint;
+
+       newstate = get_state(pp, 1);
+
+       if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
+               condlog(2, "%s: unusable path", pp->dev);
+               pathinfo(pp, conf->hwtable, 0);
+               return;
+       }
+       /*
+        * Async IO in flight. Keep the previous path state
+        * and reschedule as soon as possible
+        */
+       if (newstate == PATH_PENDING) {
+               pp->tick = 1;
+               return;
+       }
+       if (newstate != pp->state) {
+               int oldstate = pp->state;
+               pp->state = newstate;
+               LOG_MSG(1, checker_message(&pp->checker));
+
+               /*
+                * upon state change, reset the checkint
+                * to the shortest delay
+                */
+               pp->checkint = conf->checkint;
+
+               if (newstate == PATH_DOWN || newstate == PATH_SHAKY ||
+                   update_multipath_strings(pp->mpp, vecs->pathvec)) {
+                       /*
+                        * proactively fail path in the DM
+                        */
+                       if (oldstate == PATH_UP ||
+                           oldstate == PATH_GHOST)
+                               fail_path(pp, 1);
+                       else
+                               fail_path(pp, 0);
+
+                       /*
+                        * cancel scheduled failback
+                        */
+                       pp->mpp->failback_tick = 0;
+
+                       pp->mpp->stat_path_failures++;
+                       return;
+               }
+
+               /*
+                * reinstate this path
+                */
+               if (oldstate != PATH_UP &&
+                   oldstate != PATH_GHOST)
+                       reinstate_path(pp, 1);
+               else
+                       reinstate_path(pp, 0);
+
+               /*
+                * schedule [defered] failback
+                */
+               if (pp->mpp->pgfailback > 0)
+                       pp->mpp->failback_tick =
+                               pp->mpp->pgfailback + 1;
+               else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE &&
+                   need_switch_pathgroup(pp->mpp, 1))
+                       switch_pathgroup(pp->mpp);
+
+               /*
+                * if at least one path is up in a group, and
+                * the group is disabled, re-enable it
+                */
+               if (newstate == PATH_UP)
+                       enable_group(pp);
+       }
+       else if (newstate == PATH_UP || newstate == PATH_GHOST) {
+               LOG_MSG(4, checker_message(&pp->checker));
+               /*
+                * double the next check delay.
+                * max at conf->max_checkint
+                */
+               if (pp->checkint < (conf->max_checkint / 2))
+                       pp->checkint = 2 * pp->checkint;
+               else
+                       pp->checkint = conf->max_checkint;
+
+               pp->tick = pp->checkint;
+               condlog(4, "%s: delay next check %is",
+                               pp->dev_t, pp->tick);
+       }
+       else if (newstate == PATH_DOWN)
+               LOG_MSG(2, checker_message(&pp->checker));
+
+       pp->state = newstate;
+
+       /*
+        * path prio refreshing
+        */
+       condlog(4, "path prio refresh");
+       pathinfo(pp, conf->hwtable, DI_PRIO);
+
+       /*
+        * pathgroup failback policy
+        */
+       if (need_switch_pathgroup(pp->mpp, 0)) {
+               if (pp->mpp->pgfailback > 0 &&
+                   pp->mpp->failback_tick <= 0)
+                       pp->mpp->failback_tick =
+                               pp->mpp->pgfailback + 1;
+               else if (pp->mpp->pgfailback ==
+                               -FAILBACK_IMMEDIATE)
+                       switch_pathgroup(pp->mpp);
+       }
+}
+
+static void *
+checkerloop (void *ap)
+{
+       struct vectors *vecs;
+       struct path *pp;
+       int count = 0;
+       unsigned int i;
+       sigset_t old;
+
+       mlockall(MCL_CURRENT | MCL_FUTURE);
+       vecs = (struct vectors *)ap;
+       condlog(2, "path checkers start up");
+
+       /*
+        * init the path check interval
+        */
+       vector_foreach_slot (vecs->pathvec, pp, i) {
+               pp->checkint = conf->checkint;
+       }
+
+       while (1) {
+               block_signal(SIGHUP, &old);
+               pthread_cleanup_push(cleanup_lock, &vecs->lock);
+               lock(vecs->lock);
+               condlog(4, "tick");
+
+               if (vecs->pathvec) {
+                       vector_foreach_slot (vecs->pathvec, pp, i) {
+                               check_path(vecs, pp);
+                       }
+               }
+               if (vecs->mpvec) {
+                       defered_failback_tick(vecs->mpvec);
+                       retry_count_tick(vecs->mpvec);
+               }
+               if (count)
+                       count--;
+               else {
+                       condlog(4, "map garbage collection");
+                       mpvec_garbage_collector(vecs);
+                       count = MAPGCINT;
+               }
+
+               lock_cleanup_pop(vecs->lock);
+               pthread_sigmask(SIG_SETMASK, &old, NULL);
+               sleep(1);
+       }
+       return NULL;
+}
+
+int
+configure (struct vectors * vecs, int start_waiters)
+{
+       struct multipath * mpp;
+       struct path * pp;
+       vector mpvec;
+       int i;
+
+       if (!vecs->pathvec && !(vecs->pathvec = vector_alloc()))
+               return 1;
+
+       if (!vecs->mpvec && !(vecs->mpvec = vector_alloc()))
+               return 1;
+
+       if (!(mpvec = vector_alloc()))
+               return 1;
+
+       /*
+        * probe for current path (from sysfs) and map (from dm) sets
+        */
+       path_discovery(vecs->pathvec, conf, DI_ALL);
+
+       vector_foreach_slot (vecs->pathvec, pp, i){
+               if (filter_path(conf, pp) > 0){
+                       vector_del_slot(vecs->pathvec, i);
+                       free_path(pp);
+                       i--;
+               }
+               else
+                       pp->checkint = conf->checkint;
+       }
+       if (map_discovery(vecs))
+               return 1;
+
+       /*
+        * create new set of maps & push changed ones into dm
+        */
+       if (coalesce_paths(vecs, mpvec, NULL, 0))
+               return 1;
+
+       /*
+        * may need to remove some maps which are no longer relevant
+        * e.g., due to blacklist changes in conf file
+        */
+       if (coalesce_maps(vecs, mpvec))
+               return 1;
+
+       dm_lib_release();
+
+       sync_maps_state(mpvec);
+
+       /*
+        * purge dm of old maps
+        */
+       remove_maps(vecs);
+
+       /*
+        * save new set of maps formed by considering current path state
+        */
+       vector_free(vecs->mpvec);
+       vecs->mpvec = mpvec;
+
+       /*
+        * start dm event waiter threads for these new maps
+        */
+       vector_foreach_slot(vecs->mpvec, mpp, i) {
+               if (setup_multipath(vecs, mpp))
+                       return 1;
+               if (start_waiters)
+                       if (start_waiter_thread(mpp, vecs))
+                               return 1;
+       }
+       return 0;
+}
+
+int
+reconfigure (struct vectors * vecs)
+{
+       struct config * old = conf;
+
+       /*
+        * free old map and path vectors ... they use old conf state
+        */
+       if (VECTOR_SIZE(vecs->mpvec))
+               remove_maps_and_stop_waiters(vecs);
+
+       if (VECTOR_SIZE(vecs->pathvec))
+               free_pathvec(vecs->pathvec, FREE_PATHS);
+
+       vecs->pathvec = NULL;
+       conf = NULL;
+
+       if (load_config(DEFAULT_CONFIGFILE))
+               return 1;
+
+       conf->verbosity = old->verbosity;
+
+       if (!conf->checkint) {
+               conf->checkint = DEFAULT_CHECKINT;
+               conf->max_checkint = MAX_CHECKINT(conf->checkint);
+       }
+       conf->daemon = 1;
+       configure(vecs, 1);
+       free_config(old);
+       return 0;
+}
+
+static struct vectors *
+init_vecs (void)
+{
+       struct vectors * vecs;
+
+       vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
+
+       if (!vecs)
+               return NULL;
+
+       vecs->lock.mutex =
+               (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
+
+       if (!vecs->lock.mutex)
+               goto out;
+
+       pthread_mutex_init(vecs->lock.mutex, NULL);
+       vecs->lock.depth = 0;
+
+       return vecs;
+
+out:
+       FREE(vecs);
+       condlog(0, "failed to init paths");
+       return NULL;
+}
+
+static void *
+signal_set(int signo, void (*func) (int))
+{
+       int r;
+       struct sigaction sig;
+       struct sigaction osig;
+
+       sig.sa_handler = func;
+       sigemptyset(&sig.sa_mask);
+       sig.sa_flags = 0;
+
+       r = sigaction(signo, &sig, &osig);
+
+       if (r < 0)
+               return (SIG_ERR);
+       else
+               return (osig.sa_handler);
+}
+
+static void
+sighup (int sig)
+{
+       condlog(2, "reconfigure (SIGHUP)");
+
+       lock(gvecs->lock);
+       reconfigure(gvecs);
+       unlock(gvecs->lock);
+
+#ifdef _DEBUG_
+       dbg_free_final(NULL);
+#endif
+}
+
+static void
+sigend (int sig)
+{
+       exit_daemon(0);
+}
+
+static void
+sigusr1 (int sig)
+{
+       condlog(3, "SIGUSR1 received");
+}
+
+static void
+signal_init(void)
+{
+       signal_set(SIGHUP, sighup);
+       signal_set(SIGUSR1, sigusr1);
+       signal_set(SIGINT, sigend);
+       signal_set(SIGTERM, sigend);
+       signal(SIGPIPE, SIG_IGN);
+}
+
+static void
+setscheduler (void)
+{
+       int res;
+       static struct sched_param sched_param = {
+               .sched_priority = 99
+       };
+
+       res = sched_setscheduler (0, SCHED_RR, &sched_param);
+
+       if (res == -1)
+               condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
+       return;
+}
+
+static void
+set_oom_adj (int val)
+{
+       FILE *fp;
+
+       fp = fopen("/proc/self/oom_adj", "w");
+
+       if (!fp)
+               return;
+
+       fprintf(fp, "%i", val);
+       fclose(fp);
+}
+
+void
+setup_thread_attr(pthread_attr_t *attr, size_t stacksize, int detached)
+{
+       if (pthread_attr_init(attr)) {
+               fprintf(stderr, "can't initialize thread attr: %s\n",
+                       strerror(errno));
+               exit(1);
+       }
+       if (stacksize < PTHREAD_STACK_MIN)
+               stacksize = PTHREAD_STACK_MIN;
+
+       if (pthread_attr_setstacksize(attr, stacksize)) {
+               fprintf(stderr, "can't set thread stack size to %lu: %s\n",
+                       (unsigned long)stacksize, strerror(errno));
+               exit(1);
+       }
+       if (detached && pthread_attr_setdetachstate(attr,
+                                                   PTHREAD_CREATE_DETACHED)) {
+               fprintf(stderr, "can't set thread to detached: %s\n",
+                       strerror(errno));
+               exit(1);
+       }
+}
+
+static int
+child (void * param)
+{
+       pthread_t check_thr, uevent_thr, uxlsnr_thr;
+       pthread_attr_t log_attr, misc_attr;
+       struct vectors * vecs;
+       struct multipath * mpp;
+       int i;
+
+       mlockall(MCL_CURRENT | MCL_FUTURE);
+
+       setup_thread_attr(&misc_attr, 64 * 1024, 1);
+       setup_thread_attr(&waiter_attr, 32 * 1024, 1);
+
+       if (logsink) {
+               setup_thread_attr(&log_attr, 64 * 1024, 0);
+               log_thread_start(&log_attr);
+               pthread_attr_destroy(&log_attr);
+       }
+
+       condlog(2, "--------start up--------");
+       condlog(2, "read " DEFAULT_CONFIGFILE);
+
+       if (load_config(DEFAULT_CONFIGFILE))
+               exit(1);
+
+       if (init_checkers()) {
+               condlog(0, "failed to initialize checkers");
+               exit(1);
+       }
+       if (init_prio()) {
+               condlog(0, "failed to initialize prioritizers");
+               exit(1);
+       }
+
+       setlogmask(LOG_UPTO(conf->verbosity + 3));
+
+       /*
+        * fill the voids left in the config file
+        */
+       if (!conf->checkint) {
+               conf->checkint = DEFAULT_CHECKINT;
+               conf->max_checkint = MAX_CHECKINT(conf->checkint);
+       }
+
+       if (conf->max_fds) {
+               struct rlimit fd_limit;
+
+               fd_limit.rlim_cur = conf->max_fds;
+               fd_limit.rlim_max = conf->max_fds;
+               if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0)
+                       condlog(0, "can't set open fds limit to %d : %s\n",
+                               conf->max_fds, strerror(errno));
+       }
+
+       if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
+               if (logsink)
+                       log_thread_stop();
+
+               exit(1);
+       }
+       signal_init();
+       setscheduler();
+       set_oom_adj(-16);
+       vecs = gvecs = init_vecs();
+
+       if (!vecs)
+               exit(1);
+
+       if (sysfs_init(conf->sysfs_dir, FILE_NAME_SIZE)) {
+               condlog(0, "can not find sysfs mount point");
+               exit(1);
+       }
+       conf->daemon = 1;
+       dm_udev_set_sync_support(0);
+       /*
+        * fetch and configure both paths and multipaths
+        */
+       if (configure(vecs, 1)) {
+               condlog(0, "failure during configuration");
+               exit(1);
+       }
+       /*
+        * start threads
+        */
+       pthread_create(&check_thr, &misc_attr, checkerloop, vecs);
+       pthread_create(&uevent_thr, &misc_attr, ueventloop, vecs);
+       pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs);
+       pthread_attr_destroy(&misc_attr);
+
+       pthread_mutex_lock(&exit_mutex);
+       pthread_cond_wait(&exit_cond, &exit_mutex);
+
+       /*
+        * exit path
+        */
+       block_signal(SIGHUP, NULL);
+       lock(vecs->lock);
+       if (conf->queue_without_daemon == QUE_NO_DAEMON_OFF)
+               vector_foreach_slot(vecs->mpvec, mpp, i)
+                       dm_queue_if_no_path(mpp->alias, 0);
+       remove_maps_and_stop_waiters(vecs);
+       free_pathvec(vecs->pathvec, FREE_PATHS);
+
+       pthread_cancel(check_thr);
+       pthread_cancel(uevent_thr);
+       pthread_cancel(uxlsnr_thr);
+
+       sysfs_cleanup();
+
+       free_keys(keys);
+       keys = NULL;
+       free_handlers(handlers);
+       handlers = NULL;
+       free_polls();
+
+       unlock(vecs->lock);
+       /* Now all the waitevent threads will start rushing in. */
+       while (vecs->lock.depth > 0) {
+               sleep (1); /* This is weak. */
+               condlog(3,"Have %d wait event checkers threads to de-alloc, waiting..\n", vecs->lock.depth);
+       }
+       pthread_mutex_destroy(vecs->lock.mutex);
+       FREE(vecs->lock.mutex);
+       vecs->lock.depth = 0;
+       vecs->lock.mutex = NULL;
+       FREE(vecs);
+       vecs = NULL;
+
+       condlog(2, "--------shut down-------");
+
+       if (logsink)
+               log_thread_stop();
+
+       dm_lib_release();
+       dm_lib_exit();
+
+       cleanup_prio();
+       cleanup_checkers();
+       /*
+        * Freeing config must be done after condlog() and dm_lib_exit(),
+        * because logging functions like dlog() and dm_write_log()
+        * reference the config.
+        */
+       free_config(conf);
+       conf = NULL;
+
+#ifdef _DEBUG_
+       dbg_free_final(NULL);
+#endif
+
+       exit(0);
+}
+
+static int
+daemonize(void)
+{
+       int pid;
+       int in_fd, out_fd;
+
+       if( (pid = fork()) < 0){
+               fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
+               return -1;
+       }
+       else if (pid != 0)
+               return pid;
+
+       setsid();
+
+       if ( (pid = fork()) < 0)
+               fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
+       else if (pid != 0)
+               _exit(0);
+
+       in_fd = open("/dev/null", O_RDONLY);
+       if (in_fd < 0){
+               fprintf(stderr, "cannot open /dev/null for input : %s\n",
+                       strerror(errno));
+               _exit(0);
+       }
+       out_fd = open("/dev/console", O_WRONLY);
+       if (out_fd < 0){
+               fprintf(stderr, "cannot open /dev/console for output : %s\n",
+                       strerror(errno));
+               _exit(0);
+       }
+
+       close(STDIN_FILENO);
+       dup(in_fd);
+       close(STDOUT_FILENO);
+       dup(out_fd);
+       close(STDERR_FILENO);
+       dup(out_fd);
+
+       close(in_fd);
+       close(out_fd);
+       if (chdir("/") < 0)
+               fprintf(stderr, "cannot chdir to '/', continuing\n");
+
+       return 0;
+}
+
+int
+main (int argc, char *argv[])
+{
+       extern char *optarg;
+       extern int optind;
+       int arg;
+       int err;
+
+       logsink = 1;
+       dm_init();
+
+       if (getuid() != 0) {
+               fprintf(stderr, "need to be root\n");
+               exit(1);
+       }
+
+       /* make sure we don't lock any path */
+       chdir("/");
+       umask(umask(077) | 022);
+
+       conf = alloc_config();
+
+       if (!conf)
+               exit(1);
+
+       while ((arg = getopt(argc, argv, ":dv:k::")) != EOF ) {
+       switch(arg) {
+               case 'd':
+                       logsink = 0;
+                       //debug=1; /* ### comment me out ### */
+                       break;
+               case 'v':
+                       if (sizeof(optarg) > sizeof(char *) ||
+                           !isdigit(optarg[0]))
+                               exit(1);
+
+                       conf->verbosity = atoi(optarg);
+                       break;
+               case 'k':
+                       uxclnt(optarg);
+                       exit(0);
+               default:
+                       ;
+               }
+       }
+       if (optind < argc) {
+               char cmd[CMDSIZE];
+               char * s = cmd;
+               char * c = s;
+
+               while (optind < argc) {
+                       if (strchr(argv[optind], ' '))
+                               c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
+                       else
+                               c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
+                       optind++;
+               }
+               c += snprintf(c, s + CMDSIZE - c, "\n");
+               uxclnt(s);
+               exit(0);
+       }
+
+       if (!logsink)
+               err = 0;
+       else
+               err = daemonize();
+
+       if (err < 0)
+               /* error */
+               exit(1);
+       else if (err > 0)
+               /* parent dies */
+               exit(0);
+       else
+               /* child lives */
+               return (child(NULL));
+}
+
diff --git a/multipathd/main.h b/multipathd/main.h
new file mode 100644 (file)
index 0000000..136b7e5
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef MAIN_H
+#define MAIN_H
+
+#define MAPGCINT 5
+
+int reconfigure (struct vectors *);
+int ev_add_path (char *, struct vectors *);
+int ev_remove_path (char *, struct vectors *);
+int ev_add_map (struct sysfs_device *, struct vectors *);
+int ev_remove_map (char *, struct vectors *);
+void sync_map_state (struct multipath *);
+
+#endif /* MAIN_H */
diff --git a/multipathd/multipathd.8 b/multipathd/multipathd.8
new file mode 100644 (file)
index 0000000..c63185c
--- /dev/null
@@ -0,0 +1,114 @@
+.TH MULTIPATHD 8 "November 2009" "Linux Administrator's Manual"
+.SH NAME
+multipathd \- multipath daemon
+
+.SH SYNOPSIS
+.B multipathd
+.RB [\| options \|]
+
+.SH DESCRIPTION
+The 
+.B multipathd 
+daemon is in charge of checking for failed paths. When this happens,
+it will reconfigure the multipath map the path belongs to, so that this map 
+regains its maximum performance and redundancy.
+
+This daemon executes the external multipath config tool when events occur. 
+In turn, the multipath tool signals the multipathd daemon when it is done with 
+devmap reconfiguration, so that it can refresh its failed path list.
+
+.SH OPTIONS
+.TP
+.B \-d
+Forground Mode. Don't daemonize, and print all messages to stdout and stderr.
+.TP 
+.B -v "level"
+Verbosity level. Print additional information while running multipathd. A  level of 0 means only print errors. A level of 3 or greater prints debugging information as well. 
+.TP
+.B -k 
+multipathd will enter interactive mode. From this mode, the available commands can be viewed by entering "help". When you are finished entering commands, press CTRL-D to quit.
+
+.SH COMMANDS
+.TP
+The following commands can be used in interactive mode:
+.TP
+.B list|show paths
+Show the paths that multipathd is monitoring, and their state. 
+.TP
+.B list|show maps|multipaths
+Show the multipath devices that the multipathd is monitoring. 
+.TP
+.B list|show maps|multipaths status
+Show the status of all multipath devices that the multipathd is monitoring.
+.TP
+.B list|show maps|multipaths stats
+Show some statistics of all multipath devices that the multipathd is monitoring.
+.TP
+.B list|show maps|multipaths topology
+Show the current multipath topology. Same as "multipath -ll".
+.TP
+.B list|show topology
+Show the current multipath topology. Same as "multipath -ll".
+.TP
+.B list|show map|multipath $map topology
+Show topology of a single multipath device specified by $map, e.g. 36005076303ffc56200000000000010aa.
+This map could be obtained from "list maps".
+.TP
+.B list|show config
+Show the currently used configuration, derived from default values and values specified within the configuration file /etc/multipath.conf.
+.TP
+.B list|show blacklist
+Show the currently used blacklist rules, derived from default values and values specified within the configuration file /etc/multipath.conf.
+.TP
+.B list|show devices
+Show all available block devices by name including the information if they are blacklisted or not.
+.TP
+.B list|show status
+Show the number of path checkers in each possible state, the number of monitored paths, and whether multipathd is currently handling a uevent.
+.TP
+.B add path $path
+Add a path to the list of monitored paths. $path is as listed in /sys/block (e.g. sda).
+.TP 
+.B remove|del path $path
+Stop monitoring a path. $path is as listed in /sys/block (e.g. sda).
+.TP
+.B add map $map
+Add a multipath device to the list of monitored devices. $map can either be a device-mapper device as listed in /sys/block (e.g. dm-0) or it can be the alias for the multipath device (e.g. mpath1) or the uid of the multipath device (e.g. 36005076303ffc56200000000000010aa). 
+.TP
+.B remove|del map $map
+Stop monitoring a multipath device.
+.TP
+.B resize map|multipath $map
+Resizes map $map to the given size
+.TP 
+.B switch|switchgroup map $map group $group
+Force a multipath device to switch to a specific path group. $group is the path group index, starting with 1.
+.TP
+.B reconfigure
+Reconfigures the multipaths. This should be triggered automatically after any hotplug event.
+.TP
+.B suspend map|multipath $map
+Sets map $map into suspend state.
+.TP
+.B resume map|multipath $map
+Resumes map $map from suspend state.
+.TP
+.B fail path $path
+Sets path $path into failed state.
+.TP
+.B reinstate path $path
+Resumes path $path from failed state.
+.TP
+.B disablequeueing map|multipath $map
+Disabled queuing on multipathed map $map
+.TP
+.B restorequeueing map|multipath $map
+Restore queuing on multipahted map $map
+
+.SH "SEE ALSO"
+.BR multipath (8)
+.BR kpartx (8)
+.BR hotplug (8)
+.SH "AUTHORS"
+.B multipathd
+was developed by Christophe Varoqui, <christophe.varoqui@opensvc.com> and others.
diff --git a/multipathd/multipathd.init.debian b/multipathd/multipathd.init.debian
new file mode 100644 (file)
index 0000000..f1e2de0
--- /dev/null
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+PATH=/bin:/usr/bin:/sbin:/usr/sbin
+DAEMON=/usr/bin/multipathd
+PIDFILE=/var/run/multipathd.pid
+
+test -x $DAEMON || exit 0
+
+case "$1" in
+  start)
+       echo -n "Starting multipath daemon: multipathd"
+        $DAEMON
+       echo "."
+       ;;
+  stop)
+       echo -n "Stopping multipath daemon: multipathd"
+       echo "."
+       if [ -f $PIDFILE ]
+       then
+               kill `cat $PIDFILE`
+       else
+               echo "multipathd not running: Nothing to stop..."
+       fi
+       ;;
+  force-reload|restart)
+       $0 stop
+       $0 start
+       ;;
+  *)
+       echo "Usage: /etc/init.d/multipathd {start|stop|restart|force-reload}"
+       exit 1
+       ;;
+esac
+
+exit 0
diff --git a/multipathd/multipathd.init.redhat b/multipathd/multipathd.init.redhat
new file mode 100644 (file)
index 0000000..15b5753
--- /dev/null
@@ -0,0 +1,146 @@
+#!/bin/bash
+#
+# multipathd   Starts the multipath daemon
+#
+# chkconfig: - 06 87
+# description: Manages device-mapper multipath devices
+
+### BEGIN INIT INFO
+# Provides: multipathd
+# Required-Start:
+# Required-Stop:
+# Default-Start:
+# Default-Stop:
+# Short-Description: Control multipathd
+# Description: This service monitors and manages
+#              device-mapper multipath devices
+### END INIT INFO
+
+DAEMON=/sbin/multipathd
+prog=`basename $DAEMON`
+initdir=/etc/rc.d/init.d
+lockdir=/var/lock/subsys
+sysconfig=/etc/sysconfig
+syspath=/sys/block
+
+. $initdir/functions
+test -r $sysconfig/$prog && . $sysconfig/$prog
+
+RETVAL=0
+
+teardown_slaves()
+{
+pushd $1 > /dev/null
+if [ -d "slaves" ]; then
+for slave in slaves/*;
+do
+       if [ "$slave" = "slaves/*" ]; then
+               read dev <  $1/dev
+               tablename=`dmsetup table --target multipath | sed -n "s/\(.*\): .* $dev .*/\1/p"`
+               if ! [ -z $tablename ]; then
+                       echo "Root is on a multipathed device, multipathd can not be stopped"
+                       exit 1
+               fi
+       else
+               local_slave=`readlink -f $slave`;
+               teardown_slaves $local_slave;
+       fi
+       done
+
+else
+               read dev <  $1/dev
+               tablename=`dmsetup table --target multipath | sed -n "s/\(.*\): .* $dev .*/\1/p"`
+               if ! [ -z $tablename ]; then
+                       echo "Root is on a multipathed device, multipathd can not be stopped"
+                       exit 1
+               fi
+fi
+popd > /dev/null
+}
+
+#
+# See how we were called.
+#
+
+start() {
+       test -x $DAEMON || exit 5
+       echo -n $"Starting $prog daemon: "
+       daemon $DAEMON
+       RETVAL=$?
+       [ $RETVAL -eq 0 ] && touch $lockdir/$prog
+       echo
+}
+
+force_stop() {
+       echo -n $"Stopping $prog daemon: "
+       killproc $DAEMON
+       RETVAL=$?
+       [ $RETVAL -eq 0 ] && rm -f $lockdir/$prog
+       echo
+}
+
+stop() {
+        root_dev=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/") { print $1; }}' /etc/mtab)
+       dm_num=`dmsetup info -c --noheadings -o minor $root_dev 2> /dev/null`
+       if [ $? -eq 0 ]; then
+               root_dm_device="dm-$dm_num"
+               [ -d $syspath/$root_dm_device ] && teardown_slaves $syspath/$root_dm_device
+       fi
+
+       force_stop
+}
+
+restart() {
+       stop
+       start
+}
+
+force_restart() {
+       force_stop
+       start
+}
+
+reload() {
+       echo -n "Reloading $prog: "
+       trap "" SIGHUP
+       killproc $DAEMON -HUP
+       RETVAL=$?
+       echo
+}      
+
+case "$1" in
+start)
+       start
+       ;;
+stop)
+       stop
+       ;;
+force-stop)
+       force_stop
+       ;;
+force-reload|reload)
+       reload
+       ;;
+restart)
+       restart
+       ;;
+force-restart)
+       force_restart
+       ;;
+condrestart|try-restart)
+       if [ -f $lockdir/$prog ]; then
+           restart
+       fi
+       ;;
+status)
+       status $prog
+       RETVAL=$?
+       ;;
+*)
+       echo $"Usage: $0 {start|stop|force-stop|status|restart|force-restart|condrestart|reload}"
+       RETVAL=2
+esac
+
+exit $RETVAL
diff --git a/multipathd/multipathd.init.suse b/multipathd/multipathd.init.suse
new file mode 100644 (file)
index 0000000..d851354
--- /dev/null
@@ -0,0 +1,132 @@
+#! /bin/sh
+# Copyright (c) 1995-2001 SuSE GmbH Nuernberg, Germany.
+#
+# Author: Hannes Reinecke <feedback@suse.de>
+#
+# init.d/routed
+#
+#   and symbolic its link
+#
+# /usr/sbin/rcrouted
+#
+### BEGIN INIT INFO
+# Provides:          multipathd
+# Required-Start:    $syslog
+# Required-Stop:
+# Default-Start:     3 5
+# Default-Stop:             0 1 2 4 6
+# Description:       Starts multipath daemon
+### END INIT INFO
+
+PATH=/bin:/usr/bin:/sbin:/usr/sbin
+DAEMON=/sbin/multipathd
+PIDFILE=/var/run/multipathd.pid
+
+# Set the maximum number of open files
+MAX_OPEN_FDS=4096
+
+test -x $DAEMON || exit 5
+
+. /etc/rc.status
+
+# First reset status of this service
+rc_reset
+
+case "$1" in
+    start)
+       echo -n "Starting multipathd"
+
+       modprobe dm-multipath
+
+       # Set the maximum number of open files
+       if [ -n "$MAX_OPEN_FDS" ] ; then
+           ulimit -n $MAX_OPEN_FDS
+       fi
+
+       if [ -f $PIDFILE ]; then
+               PID="$(cat $PIDFILE)"
+               PROCNAME="$(ps -o cmd --no-headers $PID)"
+       fi
+
+       if [ "$PROCNAME" != "$DAEMON" ]; then
+               $DAEMON
+       fi
+       
+       # Remember status and be verbose
+       rc_status -v
+       sleep 1
+       ;;
+    stop)
+       echo -n "Shutting down multipathd"
+       # Because of the way how multipathd sets up its own namespace
+       # and chroots to it, killproc cannot be used with this process.
+       # So implement a cruder version:
+       if [ -f $PIDFILE ]; then
+               PID="$(cat $PIDFILE)"
+               PROCNAME="$(ps -o cmd --no-headers $PID)"
+       fi
+
+       if [ "$PROCNAME" == "$DAEMON" ]; then
+               kill -TERM $PID
+       fi
+
+       # Remember status and be verbose
+       rc_status -v
+       ;;
+    try-restart)
+       ## Stop the service and if this succeeds (i.e. the 
+       ## service was running before), start it again.
+        $0 status >/dev/null &&  $0 restart
+
+       # Remember status and be quiet
+       rc_status
+       ;;
+    restart|force-reload)
+       ## Stop the service and regardless of whether it was
+       ## running or not, start it again.
+       $0 stop
+       $0 start
+
+       # Remember status and be quiet
+       rc_status
+       ;;
+    reload)
+       ## Like force-reload, but if daemon does not support
+       ## signalling, do nothing (!)
+
+       # If it does not support reload:
+       exit 3
+       ;;
+    status)
+       echo -n "Checking for multipathd: "
+
+       # Status has a slightly different for the status command:
+       # 0 - service running
+       # 1 - service dead, but /var/run/  pid  file exists
+       # 2 - service dead, but /var/lock/ lock file exists
+       # 3 - service not running
+
+       if [ -f $PIDFILE ]; then
+               PID="$(cat $PIDFILE)"
+               PROCNAME="$(ps -o cmd --no-headers $PID)"
+               if [ "$PROCNAME" == "$DAEMON" ]; then
+                       (exit 0)
+               else
+                       (exit 1)
+               fi
+       else
+               (exit 3)
+       fi
+
+       rc_status -v
+       ;;
+    probe)
+       ## Optional: Probe for the necessity of a reload,
+       ## give out the argument which is required for a reload.
+       ;;
+    *)
+       echo "Usage: $0 {start|stop|status|try-restart|restart|force-reload|reload|probe}"
+       exit 1
+       ;;
+esac
+rc_exit
diff --git a/multipathd/pidfile.c b/multipathd/pidfile.c
new file mode 100644 (file)
index 0000000..e3fb896
--- /dev/null
@@ -0,0 +1,67 @@
+#include <sys/types.h> /* for pid_t */
+#include <sys/stat.h>  /* for open */
+#include <signal.h>    /* for kill() */
+#include <errno.h>     /* for ESHRC */
+#include <stdio.h>     /* for f...() */
+#include <string.h>    /* for memset() */
+#include <stdlib.h>    /* for atoi() */
+#include <unistd.h>    /* for unlink() */
+#include <fcntl.h>     /* for fcntl() */
+
+#include <debug.h>
+
+#include "pidfile.h"
+
+int pidfile_create(const char *pidFile, pid_t pid)
+{
+       char buf[20];
+       struct flock lock;
+       int fd, value;
+
+       if((fd = open(pidFile, O_WRONLY | O_CREAT,
+                      (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))) < 0) {
+               condlog(0, "Cannot open pidfile [%s], error was [%s]",
+                       pidFile, strerror(errno));
+               return 1;
+       }
+       lock.l_type = F_WRLCK;
+       lock.l_start = 0;
+       lock.l_whence = SEEK_SET;
+       lock.l_len = 0;
+
+       if (fcntl(fd, F_SETLK, &lock) < 0) {
+               if (errno != EACCES && errno != EAGAIN)
+                       condlog(0, "Cannot lock pidfile [%s], error was [%s]",
+                               pidFile, strerror(errno));
+               else
+                       condlog(0, "process is already running");
+               goto fail;
+       }
+       if (ftruncate(fd, 0) < 0) {
+               condlog(0, "Cannot truncate pidfile [%s], error was [%s]",
+                       pidFile, strerror(errno));
+               goto fail;
+       }
+       memset(buf, 0, sizeof(buf));
+       snprintf(buf, sizeof(buf)-1, "%u", pid);
+       if (write(fd, buf, strlen(buf)) != strlen(buf)) {
+               condlog(0, "Cannot write pid to pidfile [%s], error was [%s]",
+                       pidFile, strerror(errno));
+               goto fail;
+       }
+       if ((value = fcntl(fd, F_GETFD, 0)) < 0) {
+               condlog(0, "Cannot get close-on-exec flag from pidfile [%s], "
+                       "error was [%s]", pidFile, strerror(errno));
+               goto fail;
+       }
+       value |= FD_CLOEXEC;
+       if (fcntl(fd, F_SETFD, value) < 0) {
+               condlog(0, "Cannot set close-on-exec flag from pidfile [%s], "
+                       "error was [%s]", pidFile, strerror(errno));
+               goto fail;
+       }
+       return 0;
+fail:
+       close(fd);
+       return 1;
+}
diff --git a/multipathd/pidfile.h b/multipathd/pidfile.h
new file mode 100644 (file)
index 0000000..d308892
--- /dev/null
@@ -0,0 +1 @@
+int pidfile_create(const char *pidFile, pid_t pid);
diff --git a/multipathd/uxclnt.c b/multipathd/uxclnt.c
new file mode 100644 (file)
index 0000000..2882716
--- /dev/null
@@ -0,0 +1,109 @@
+/*
+ * Original author : tridge@samba.org, January 2002
+ * 
+ * Copyright (c) 2005 Christophe Varoqui
+ * Copyright (c) 2005 Benjamin Marzinski, Redhat
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/poll.h>
+#include <readline/readline.h>
+#include <readline/history.h>
+
+#include <uxsock.h>
+#include <memory.h>
+#include <defaults.h>
+
+#include <vector.h>
+#include "cli.h"
+
+static void print_reply(char *s)
+{
+       if (isatty(1)) {
+               printf("%s", s);
+               return;
+       }
+       /* strip ANSI color markers */
+       while (*s != '\0') {
+               if ((*s == 0x1b) && (*(s+1) == '['))
+                       while ((*s++ != 'm') && (*s != '\0')) {}; 
+               putchar(*s++);
+       }
+}
+/*
+ * process the client 
+ */
+static void process(int fd)
+{
+       char *line;
+       char *reply;
+
+       cli_init();
+       rl_readline_name = "multipathd";
+       rl_completion_entry_function = key_generator;
+       while ((line = readline("multipathd> "))) {
+               size_t len;
+               size_t llen = strlen(line);
+
+               if (!llen) {
+                       free(line);
+                       continue;
+               }
+               if (!strncmp(line, "exit", 4) && llen == 4)
+                       break;
+               if (!strncmp(line, "quit", 4) && llen == 4)
+                       break;
+
+               if (send_packet(fd, line, llen + 1) != 0) break;
+               if (recv_packet(fd, &reply, &len) != 0) break;
+
+               print_reply(reply);
+
+               if (line && *line)
+                       add_history(line);
+
+               free(line);
+               FREE(reply);
+       }
+}
+
+static void process_req(int fd, char * inbuf)
+{
+       char *reply;
+       size_t len;
+
+       if (send_packet(fd, inbuf, strlen(inbuf) + 1) != 0)
+               return;
+       if (recv_packet(fd, &reply, &len) == 0) {
+               print_reply(reply);
+               FREE(reply);
+       }
+}
+       
+/*
+ * entry point
+ */
+int uxclnt(char * inbuf)
+{
+       int fd;
+
+       fd = ux_socket_connect(DEFAULT_SOCKET);
+       if (fd == -1) {
+               perror("ux_socket_connect");
+               exit(1);
+       }
+
+       if (inbuf)
+               process_req(fd, inbuf);
+       else
+               process(fd);
+       
+       return 0;
+}
diff --git a/multipathd/uxclnt.h b/multipathd/uxclnt.h
new file mode 100644 (file)
index 0000000..0667a24
--- /dev/null
@@ -0,0 +1 @@
+int uxclnt(char * inbuf);
diff --git a/multipathd/uxlsnr.c b/multipathd/uxlsnr.c
new file mode 100644 (file)
index 0000000..a05719f
--- /dev/null
@@ -0,0 +1,173 @@
+/*
+ * Original author : tridge@samba.org, January 2002
+ * 
+ * Copyright (c) 2005 Christophe Varoqui
+ * Copyright (c) 2005 Benjamin Marzinski, Redhat
+ */
+
+/*
+ * A simple domain socket listener
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/poll.h>
+
+#include <checkers.h>
+
+#include <memory.h>
+#include <debug.h>
+#include <vector.h>
+#include <structs.h>
+#include <uxsock.h>
+#include <defaults.h>
+
+#include "uxlsnr.h"
+
+#define SLEEP_TIME 5000
+
+struct client {
+       int fd;
+       struct client *next, *prev;
+};
+
+static struct client *clients;
+static unsigned num_clients;
+
+/*
+ * handle a new client joining
+ */
+static void new_client(int ux_sock)
+{
+       struct client *c;
+       struct sockaddr addr;
+       socklen_t len = sizeof(addr);
+       int fd;
+
+       fd = accept(ux_sock, &addr, &len);
+       
+       if (fd == -1)
+               return;
+
+       /* put it in our linked list */
+       c = (struct client *)MALLOC(sizeof(*c));
+       memset(c, 0, sizeof(*c));
+       c->fd = fd;
+       c->next = clients;
+       if (c->next) c->next->prev = c;
+       clients = c;
+       num_clients++;
+}
+
+/*
+ * kill off a dead client
+ */
+static void dead_client(struct client *c)
+{
+       close(c->fd);
+       if (c->prev) c->prev->next = c->next;
+       if (c->next) c->next->prev = c->prev;
+       if (c == clients) clients = c->next;
+       FREE(c);
+       num_clients--;
+}
+
+void free_polls (void)
+{
+       FREE(polls);
+}
+
+/*
+ * entry point
+ */
+void * uxsock_listen(int (*uxsock_trigger)(char *, char **, int *, void *),
+                       void * trigger_data)
+{
+       int ux_sock;
+       size_t len;
+       int rlen;
+       char *inbuf;
+       char *reply;
+
+       ux_sock = ux_socket_listen(DEFAULT_SOCKET);
+
+       if (ux_sock == -1) {
+               condlog(0, "ux_socket_listen error");
+               exit(1);
+       }
+
+       polls = (struct pollfd *)MALLOC(0);
+
+       while (1) {
+               struct client *c;
+               int i, poll_count;
+
+               /* setup for a poll */
+               polls = REALLOC(polls, (1+num_clients) * sizeof(*polls));
+               polls[0].fd = ux_sock;
+               polls[0].events = POLLIN;
+
+               /* setup the clients */
+               for (i=1, c = clients; c; i++, c = c->next) {
+                       polls[i].fd = c->fd;
+                       polls[i].events = POLLIN;
+               }
+
+               /* most of our life is spent in this call */
+               poll_count = poll(polls, i, SLEEP_TIME);
+               
+               if (poll_count == -1) {
+                       if (errno == EINTR)
+                               continue;
+
+                       /* something went badly wrong! */
+                       condlog(0, "poll");
+                       exit(1);
+               }
+
+               if (poll_count == 0)
+                       continue;
+
+               /* see if a client wants to speak to us */
+               for (i=1, c = clients; c; i++) {
+                       struct client *next = c->next;
+
+                       if (polls[i].revents & POLLIN) {
+                               if (recv_packet(c->fd, &inbuf, &len) != 0) {
+                                       dead_client(c);
+                               } else {
+                                       inbuf[len - 1] = 0;
+                                       condlog(4, "Got request [%s]", inbuf);
+                                       uxsock_trigger(inbuf, &reply, &rlen,
+                                                       trigger_data);
+
+                                       if (reply) {
+                                               if (send_packet(c->fd, reply,
+                                                    rlen) != 0) {
+                                                       dead_client(c);
+                                               }
+                                               FREE(reply);
+                                               reply = NULL;
+                                       }
+                                       FREE(inbuf);
+                               }
+                       }
+                       c = next;
+               }
+
+               /* see if we got a new client */
+               if (polls[0].revents & POLLIN) {
+                       new_client(ux_sock);
+               }
+       }
+
+       close(ux_sock);
+       return NULL;
+}
diff --git a/multipathd/uxlsnr.h b/multipathd/uxlsnr.h
new file mode 100644 (file)
index 0000000..00d98b0
--- /dev/null
@@ -0,0 +1,7 @@
+struct pollfd *polls;
+
+void free_polls(void);
+void * uxsock_listen(int (*uxsock_trigger)
+                       (char *, char **, int *, void *),
+                       void * trigger_data);
+