From ac4c2a3bbe5db5fc570b1d0ee1e474db7cb22585 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Fri, 8 Jan 2010 14:42:40 -0800 Subject: [PATCH] zlib: optimize inffast when copying direct from output JFFS2 uses lesser compression ratio and inflate always ends up in "copy direct from output" case. This patch tries to optimize the direct copy procedure. Uses get_unaligned() but only in one place. The copy loop just above this one can also use this optimization, but I havn't done so as I have not tested if it is a win there too. On my MPC8321 this is about 17% faster on my JFFS2 root FS than the original. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Joakim Tjernlund Cc: Roel Kluin Cc: Richard Purdie Cc: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/boot/Makefile | 4 +++- lib/zlib_inflate/inffast.c | 55 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index bb2465b..826a30a 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -20,7 +20,7 @@ all: $(obj)/zImage BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ - -fno-strict-aliasing -Os -msoft-float -pipe \ + -fno-strict-aliasing -Os -msoft-float -pipe -D__KERNEL__\ -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ -isystem $(shell $(CROSS32CC) -print-file-name=include) BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc @@ -34,6 +34,8 @@ BOOTCFLAGS += -fno-stack-protector endif BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) +BOOTCFLAGS += -include include/linux/autoconf.h -Iarch/powerpc/include +BOOTCFLAGS += -Iinclude DTS_FLAGS ?= -p 1024 diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c index 8550b0c..05e1559 100644 --- a/lib/zlib_inflate/inffast.c +++ b/lib/zlib_inflate/inffast.c @@ -4,6 +4,8 @@ */ #include +#include +#include #include "inftrees.h" #include "inflate.h" #include "inffast.h" @@ -24,9 +26,11 @@ #ifdef POSTINC # define OFF 0 # define PUP(a) *(a)++ +# define UP_UNALIGNED(a) get_unaligned((a)++) #else # define OFF 1 # define PUP(a) *++(a) +# define UP_UNALIGNED(a) get_unaligned(++(a)) #endif /* @@ -239,18 +243,47 @@ void inflate_fast(z_streamp strm, unsigned start) } } else { + unsigned short *sout; + unsigned long loops; + from = out - dist; /* copy direct from output */ - do { /* minimum length is three */ - PUP(out) = PUP(from); - PUP(out) = PUP(from); - PUP(out) = PUP(from); - len -= 3; - } while (len > 2); - if (len) { - PUP(out) = PUP(from); - if (len > 1) - PUP(out) = PUP(from); - } + /* minimum length is three */ + /* Align out addr */ + if (!((long)(out - 1 + OFF) & 1)) { + PUP(out) = PUP(from); + len--; + } + sout = (unsigned short *)(out - OFF); + if (dist > 2) { + unsigned short *sfrom; + + sfrom = (unsigned short *)(from - OFF); + loops = len >> 1; + do + PUP(sout) = UP_UNALIGNED(sfrom); + while (--loops); + out = (unsigned char *)sout + OFF; + from = (unsigned char *)sfrom + OFF; + } else { /* dist == 1 or dist == 2 */ + unsigned short pat16; + + pat16 = *(sout-2+2*OFF); + if (dist == 1) +#if defined(__BIG_ENDIAN) + pat16 = (pat16 & 0xff) | ((pat16 & 0xff) << 8); +#elif defined(__LITTLE_ENDIAN) + pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00) >> 8); +#else +#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined +#endif + loops = len >> 1; + do + PUP(sout) = pat16; + while (--loops); + out = (unsigned char *)sout + OFF; + } + if (len & 1) + PUP(out) = PUP(from); } } else if ((op & 64) == 0) { /* 2nd level distance code */ -- 2.7.4