From 71687d74d08b1e27fb9d81562e03fa1de6790775 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jul 2012 15:02:07 -0400 Subject: [PATCH] Use intel hardware crc32c where available Signed-off-by: Chris Mason --- btrfs.c | 2 + crc32c.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- crc32c.h | 1 + 3 files changed, 126 insertions(+), 2 deletions(-) diff --git a/btrfs.c b/btrfs.c index 19a6961..e9d54f8 100644 --- a/btrfs.c +++ b/btrfs.c @@ -258,6 +258,8 @@ int main(int argc, char **argv) { const struct cmd_struct *cmd; + crc32c_optimization_init(); + argc--; argv++; handle_options(&argc, &argv); diff --git a/crc32c.c b/crc32c.c index 4dc5f9c..a761a1d 100644 --- a/crc32c.c +++ b/crc32c.c @@ -9,6 +9,123 @@ */ #include "kerncompat.h" #include "crc32c.h" +#include +#include +#include +#include +#include +#include +#include + +u32 __crc32c_le(u32 crc, unsigned char const *data, size_t length); +static u32 (*crc_function)(u32 crc, unsigned char const *data, size_t length) = __crc32c_le; + +#ifdef __x86_64__ + +/* + * Based on a posting to lkml by Austin Zhang + * + * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal. + * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) + * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at: + * http://www.intel.com/products/processor/manuals/ + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 2A: Instruction Set Reference, A-M + */ +#if __SIZEOF_LONG__ == 8 +#define REX_PRE "0x48, " +#define SCALE_F 8 +#else +#define REX_PRE +#define SCALE_F 4 +#endif + +static int crc32c_probed = 0; +static int crc32c_intel_available = 0; + +static uint32_t crc32c_intel_le_hw_byte(uint32_t crc, unsigned char const *data, + unsigned long length) +{ + while (length--) { + __asm__ __volatile__( + ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" + :"=S"(crc) + :"0"(crc), "c"(*data) + ); + data++; + } + + return crc; +} + +/* + * Steps through buffer one byte at at time, calculates reflected + * crc using table. + */ +uint32_t crc32c_intel(u32 crc, unsigned char const *data, unsigned long length) +{ + unsigned int iquotient = length / SCALE_F; + unsigned int iremainder = length % SCALE_F; + unsigned long *ptmp = (unsigned long *)data; + + while (iquotient--) { + __asm__ __volatile__( + ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;" + :"=S"(crc) + :"0"(crc), "c"(*ptmp) + ); + ptmp++; + } + + if (iremainder) + crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp, + iremainder); + + return crc; +} + +static void do_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, + unsigned int *edx) +{ + int id = *eax; + + asm("movl %4, %%eax;" + "cpuid;" + "movl %%eax, %0;" + "movl %%ebx, %1;" + "movl %%ecx, %2;" + "movl %%edx, %3;" + : "=r" (*eax), "=r" (*ebx), "=r" (*ecx), "=r" (*edx) + : "r" (id) + : "eax", "ebx", "ecx", "edx"); +} + +void crc32c_intel_probe(void) +{ + if (!crc32c_probed) { + unsigned int eax, ebx, ecx, edx; + + eax = 1; + + do_cpuid(&eax, &ebx, &ecx, &edx); + crc32c_intel_available = (ecx & (1 << 20)) != 0; + crc32c_probed = 1; + } +} + +int crc32c_optimization_init(void) +{ + crc32c_intel_probe(); + if (crc32c_intel_available) + crc_function = crc32c_intel; +} +#else + +int crc32c_optimization_init(void) +{ +} + +#endif /* __x86_64__ */ /* * This is the CRC-32C table @@ -91,11 +208,15 @@ static const u32 crc32c_table[256] = { * crc using table. */ -u32 crc32c_le(u32 crc, unsigned char const *data, size_t length) +u32 __crc32c_le(u32 crc, unsigned char const *data, size_t length) { while (length--) crc = crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8); - return crc; } + +u32 crc32c_le(u32 crc, unsigned char const *data, size_t length) +{ + return crc_function(crc, data, length); +} diff --git a/crc32c.h b/crc32c.h index 72f1512..27f298a 100644 --- a/crc32c.h +++ b/crc32c.h @@ -22,6 +22,7 @@ #include "kerncompat.h" u32 crc32c_le(u32 seed, unsigned char const *data, size_t length); +int crc32c_optimization_init(void); #define crc32c(seed, data, length) crc32c_le(seed, (unsigned char const *)data, length) #define btrfs_crc32c crc32c -- 2.7.4