Use intel hardware crc32c where available
authorChris Mason <chris.mason@fusionio.com>
Wed, 25 Jul 2012 19:02:07 +0000 (15:02 -0400)
committerChris Mason <chris.mason@fusionio.com>
Thu, 26 Jul 2012 18:51:27 +0000 (14:51 -0400)
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
btrfs.c
crc32c.c
crc32c.h

diff --git a/btrfs.c b/btrfs.c
index 19a6961..e9d54f8 100644 (file)
--- a/btrfs.c
+++ b/btrfs.c
@@ -258,6 +258,8 @@ int main(int argc, char **argv)
 {
        const struct cmd_struct *cmd;
 
+       crc32c_optimization_init();
+
        argc--;
        argv++;
        handle_options(&argc, &argv);
index 4dc5f9c..a761a1d 100644 (file)
--- a/crc32c.c
+++ b/crc32c.c
@@ -9,6 +9,123 @@
  */
 #include "kerncompat.h"
 #include "crc32c.h"
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+u32 __crc32c_le(u32 crc, unsigned char const *data, size_t length);
+static u32 (*crc_function)(u32 crc, unsigned char const *data, size_t length) = __crc32c_le;
+
+#ifdef __x86_64__
+
+/*
+ * Based on a posting to lkml by Austin Zhang <austin.zhang@intel.com>
+ *
+ * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
+ * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
+ * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
+ * http://www.intel.com/products/processor/manuals/
+ * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+ * Volume 2A: Instruction Set Reference, A-M
+ */
+#if  __SIZEOF_LONG__ == 8
+#define REX_PRE "0x48, "
+#define SCALE_F 8
+#else
+#define REX_PRE
+#define SCALE_F 4
+#endif
+
+static int crc32c_probed = 0;
+static int crc32c_intel_available = 0;
+
+static uint32_t crc32c_intel_le_hw_byte(uint32_t crc, unsigned char const *data,
+                                       unsigned long length)
+{
+       while (length--) {
+               __asm__ __volatile__(
+                       ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
+                       :"=S"(crc)
+                       :"0"(crc), "c"(*data)
+               );
+               data++;
+       }
+
+       return crc;
+}
+
+/*
+ * Steps through buffer one byte at at time, calculates reflected 
+ * crc using table.
+ */
+uint32_t crc32c_intel(u32 crc, unsigned char const *data, unsigned long length)
+{
+       unsigned int iquotient = length / SCALE_F;
+       unsigned int iremainder = length % SCALE_F;
+       unsigned long *ptmp = (unsigned long *)data;
+
+       while (iquotient--) {
+               __asm__ __volatile__(
+                       ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
+                       :"=S"(crc)
+                       :"0"(crc), "c"(*ptmp)
+               );
+               ptmp++;
+       }
+
+       if (iremainder)
+               crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
+                                iremainder);
+
+       return crc;
+}
+
+static void do_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx,
+                    unsigned int *edx)
+{
+       int id = *eax;
+
+       asm("movl %4, %%eax;"
+           "cpuid;"
+           "movl %%eax, %0;"
+           "movl %%ebx, %1;"
+           "movl %%ecx, %2;"
+           "movl %%edx, %3;"
+               : "=r" (*eax), "=r" (*ebx), "=r" (*ecx), "=r" (*edx)
+               : "r" (id)
+               : "eax", "ebx", "ecx", "edx");
+}
+
+void crc32c_intel_probe(void)
+{
+       if (!crc32c_probed) {
+               unsigned int eax, ebx, ecx, edx;
+
+               eax = 1;
+
+               do_cpuid(&eax, &ebx, &ecx, &edx);
+               crc32c_intel_available = (ecx & (1 << 20)) != 0;
+               crc32c_probed = 1;
+       }
+}
+
+int crc32c_optimization_init(void)
+{
+       crc32c_intel_probe();
+       if (crc32c_intel_available)
+               crc_function = crc32c_intel;
+}
+#else
+
+int crc32c_optimization_init(void)
+{
+}
+
+#endif /* __x86_64__ */
 
 /*
  * This is the CRC-32C table
@@ -91,11 +208,15 @@ static const u32 crc32c_table[256] = {
  * crc using table.
  */
 
-u32 crc32c_le(u32 crc, unsigned char const *data, size_t length)
+u32 __crc32c_le(u32 crc, unsigned char const *data, size_t length)
 {
        while (length--)
                crc =
                    crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8);
-
        return crc;
 }
+
+u32 crc32c_le(u32 crc, unsigned char const *data, size_t length)
+{
+       return crc_function(crc, data, length);
+}
index 72f1512..27f298a 100644 (file)
--- a/crc32c.h
+++ b/crc32c.h
@@ -22,6 +22,7 @@
 #include "kerncompat.h"
 
 u32 crc32c_le(u32 seed, unsigned char const *data, size_t length);
+int crc32c_optimization_init(void);
 
 #define crc32c(seed, data, length) crc32c_le(seed, (unsigned char const *)data, length)
 #define btrfs_crc32c crc32c