1 /* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007,
3 * 2008, 2009, 2010. Joshua P. MacDonald
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 /* To know more about Xdelta, start by reading xdelta3.c. If you are
21 * ready to use the API, continue reading here. There are two
22 * interfaces -- xd3_encode_input and xd3_decode_input -- plus a dozen
23 * or so related calls. This interface is styled after Zlib. */
31 #include <sys/types.h>
33 /****************************************************************/
35 /* Default configured value of stream->winsize. If the program
36 * supplies xd3_encode_input() with data smaller than winsize the
37 * stream will automatically buffer the input, otherwise the input
38 * buffer is used directly.
40 #ifndef XD3_DEFAULT_WINSIZE
41 #define XD3_DEFAULT_WINSIZE (1U << 23)
44 /* Default total size of the source window used in xdelta3-main.h */
45 #ifndef XD3_DEFAULT_SRCWINSZ
46 #define XD3_DEFAULT_SRCWINSZ (1U << 26)
49 /* When Xdelta requests a memory allocation for certain buffers, it
50 * rounds up to units of at least this size. The code assumes (and
51 * asserts) that this is a power-of-two. */
53 #define XD3_ALLOCSIZE (1U<<14)
56 /* The XD3_HARDMAXWINSIZE parameter is a safety mechanism to protect
57 * decoders against malicious files. The decoder will never decode a
58 * window larger than this. If the file specifies VCD_TARGET the
59 * decoder may require two buffers of this size.
61 * 8-16MB is reasonable, probably don't need to go larger. */
62 #ifndef XD3_HARDMAXWINSIZE
63 #define XD3_HARDMAXWINSIZE (1U<<24)
65 /* The IOPT_SIZE value sets the size of a buffer used to batch
66 * overlapping copy instructions before they are optimized by picking
67 * the best non-overlapping ranges. The larger this buffer, the
68 * longer a forced xd3_srcwin_setup() decision is held off. Setting
69 * this value to 0 causes an unlimited buffer to be used. */
70 #ifndef XD3_DEFAULT_IOPT_SIZE
71 #define XD3_DEFAULT_IOPT_SIZE (1U<<15)
74 /* The maximum distance backward to search for small matches */
75 #ifndef XD3_DEFAULT_SPREVSZ
76 #define XD3_DEFAULT_SPREVSZ (1U<<18)
79 /* The default compression level
81 #ifndef XD3_DEFAULT_LEVEL
82 #define XD3_DEFAULT_LEVEL 3
85 #ifndef XD3_DEFAULT_SECONDARY_LEVEL
86 #define XD3_DEFAULT_SECONDARY_LEVEL 6
89 #ifndef XD3_USE_LARGEFILE64
90 #define XD3_USE_LARGEFILE64 1
93 /* Sizes and addresses within VCDIFF windows are represented as usize_t
95 * For source-file offsets and total file sizes, total input and
96 * output counts, the xoff_t type is used. The decoder and encoder
97 * generally check for overflow of the xoff_t size (this is tested at
98 * the 32bit boundary [xdelta3-test.h]).
102 typedef unsigned int usize_t;
104 #define WIN32_LEAN_AND_MEAN
105 #if XD3_USE_LARGEFILE64
106 /* 64 bit file offsets: uses GetFileSizeEx and SetFilePointerEx.
107 * requires Win2000 or newer version of WinNT */
108 #define WINVER 0x0500
109 #define _WIN32_WINNT 0x0500
111 /* 32 bit (DWORD) file offsets: uses GetFileSize and
112 * SetFilePointer. compatible with win9x-me and WinNT4 */
113 #define WINVER 0x0400
114 #define _WIN32_WINNT 0x0400
117 typedef unsigned int usize_t;
120 typedef signed int ssize_t;
121 typedef unsigned char uint8_t;
122 typedef unsigned short uint16_t;
123 typedef unsigned long uint32_t;
124 typedef ULONGLONG uint64_t;
126 /* mingw32, lcc and watcom provide a proper header */
131 /* TODO: note that SIZEOF_USIZE_T is never set to 8, although it should be for
132 * a 64bit platform. OTOH, may be that using 32bits is appropriate even on a
133 * 64bit platform because we allocate large arrays of these values. */
134 #if XD3_USE_LARGEFILE64
135 #define __USE_FILE_OFFSET64 1 /* GLIBC: for 64bit fileops, ... ? */
136 #ifndef _LARGEFILE_SOURCE
137 #define _LARGEFILE_SOURCE
139 #ifndef _FILE_OFFSET_BITS
140 #define _FILE_OFFSET_BITS 64
143 typedef uint64_t xoff_t;
144 #define SIZEOF_XOFF_T 8
145 #define SIZEOF_USIZE_T 4
152 typedef uint32_t xoff_t;
153 #define SIZEOF_XOFF_T 4
154 #define SIZEOF_USIZE_T 4
158 #define USE_UINT32 (SIZEOF_USIZE_T == 4 || \
159 SIZEOF_XOFF_T == 4 || REGRESSION_TEST)
160 #define USE_UINT64 (SIZEOF_USIZE_T == 8 || \
161 SIZEOF_XOFF_T == 8 || REGRESSION_TEST)
163 /* TODO: probably should do something better here. */
165 #if defined(__i386__) || defined(__i486__) || defined(__i586__) || \
166 defined(__i686__) || defined(_X86_) || defined(__x86_64__)
167 #define UNALIGNED_OK 1
169 #define UNALIGNED_OK 0
173 /**********************************************************************/
175 /* Whether to build the encoder, otherwise only build the decoder. */
177 #define XD3_ENCODER 1
180 /* The code returned when main() fails, also defined in system
183 #define EXIT_FAILURE 1
186 /* REGRESSION TEST enables the "xdelta3 test" command, which runs a
187 series of self-tests. */
188 #ifndef REGRESSION_TEST
189 #define REGRESSION_TEST 0
192 /* XD3_DEBUG=1 enables assertions and various statistics. Levels > 1
193 * enable some additional output only useful during development and
199 #ifndef PYTHON_MODULE
200 #define PYTHON_MODULE 0
204 #define SWIG_MODULE 0
207 /* There are three string matching functions supplied: one fast, one
208 * slow (default), and one soft-configurable. To disable any of
209 * these, use the following definitions. */
210 #ifndef XD3_BUILD_SLOW
211 #define XD3_BUILD_SLOW 1
213 #ifndef XD3_BUILD_FAST
214 #define XD3_BUILD_FAST 1
216 #ifndef XD3_BUILD_FASTER
217 #define XD3_BUILD_FASTER 1
219 #ifndef XD3_BUILD_FASTEST
220 #define XD3_BUILD_FASTEST 1
222 #ifndef XD3_BUILD_SOFT
223 #define XD3_BUILD_SOFT 1
225 #ifndef XD3_BUILD_DEFAULT
226 #define XD3_BUILD_DEFAULT 1
233 /* XPRINT. Debug output and VCDIFF_TOOLS functions report to stderr.
234 * I have used an irregular style to abbreviate [fprintf(stderr, "] as
239 typedef struct _xd3_stream xd3_stream;
240 typedef struct _xd3_source xd3_source;
241 typedef struct _xd3_hash_cfg xd3_hash_cfg;
242 typedef struct _xd3_smatcher xd3_smatcher;
243 typedef struct _xd3_rinst xd3_rinst;
244 typedef struct _xd3_dinst xd3_dinst;
245 typedef struct _xd3_hinst xd3_hinst;
246 typedef struct _xd3_winst xd3_winst;
247 typedef struct _xd3_rpage xd3_rpage;
248 typedef struct _xd3_addr_cache xd3_addr_cache;
249 typedef struct _xd3_output xd3_output;
250 typedef struct _xd3_desect xd3_desect;
251 typedef struct _xd3_iopt_buflist xd3_iopt_buflist;
252 typedef struct _xd3_rlist xd3_rlist;
253 typedef struct _xd3_sec_type xd3_sec_type;
254 typedef struct _xd3_sec_cfg xd3_sec_cfg;
255 typedef struct _xd3_sec_stream xd3_sec_stream;
256 typedef struct _xd3_config xd3_config;
257 typedef struct _xd3_code_table_desc xd3_code_table_desc;
258 typedef struct _xd3_code_table_sizes xd3_code_table_sizes;
259 typedef struct _xd3_slist xd3_slist;
260 typedef struct _xd3_whole_state xd3_whole_state;
261 typedef struct _xd3_wininfo xd3_wininfo;
263 /* The stream configuration has three callbacks functions, all of
264 * which may be supplied with NULL values. If config->getblk is
265 * provided as NULL, the stream returns XD3_GETSRCBLK. */
267 typedef void* (xd3_alloc_func) (void *opaque,
270 typedef void (xd3_free_func) (void *opaque,
273 typedef int (xd3_getblk_func) (xd3_stream *stream,
277 /* These are internal functions to delay construction of encoding
278 * tables and support alternate code tables. See the comments & code
279 * enabled by GENERIC_ENCODE_TABLES. */
281 typedef const xd3_dinst* (xd3_code_table_func) (void);
282 typedef int (xd3_comp_table_func) (xd3_stream *stream,
283 const uint8_t **data,
289 #define XD3_ASSERT(x) \
290 do { if (! (x)) { DP(RINT "%s:%d: XD3 assertion failed: %s\n", __FILE__, __LINE__, #x); \
291 abort (); } } while (0)
293 #define XD3_ASSERT(x) (void)0
294 #endif /* XD3_DEBUG */
298 #define max(x,y) ({ \
299 const typeof(x) _x = (x); \
300 const typeof(y) _y = (y); \
301 (void) (&_x == &_y); \
302 _x > _y ? _x : _y; })
303 #endif /* __GNUC__ */
306 #define min(x,y) ({ \
307 const typeof(x) _x = (x); \
308 const typeof(y) _y = (y); \
309 (void) (&_x == &_y); \
310 _x < _y ? _x : _y; })
314 #define max(x,y) ((x) < (y) ? (y) : (x))
317 #define min(x,y) ((x) < (y) ? (x) : (y))
319 #endif /* __GNUC__ */
321 /****************************************************************
323 ******************************************************************/
325 /* These are the five ordinary status codes returned by the
326 * xd3_encode_input() and xd3_decode_input() state machines. */
329 /* An application must be prepared to handle these five return
330 * values from either xd3_encode_input or xd3_decode_input, except
331 * in the case of no-source compression, in which case XD3_GETSRCBLK
332 * is never returned. More detailed comments for these are given in
333 * xd3_encode_input and xd3_decode_input comments, below. */
334 XD3_INPUT = -17703, /* need input */
335 XD3_OUTPUT = -17704, /* have output */
336 XD3_GETSRCBLK = -17705, /* need a block of source input (with no
337 * xd3_getblk function), a chance to do
338 * non-blocking read. */
339 XD3_GOTHEADER = -17706, /* (decode-only) after the initial VCDIFF &
340 first window header */
341 XD3_WINSTART = -17707, /* notification: returned before a window is
342 * processed, giving a chance to
343 * XD3_SKIP_WINDOW or not XD3_SKIP_EMIT that
345 XD3_WINFINISH = -17708, /* notification: returned after
346 encode/decode & output for a window */
347 XD3_TOOFARBACK = -17709, /* (encoder only) may be returned by
348 getblk() if the block is too old */
349 XD3_INTERNAL = -17710, /* internal error */
350 XD3_INVALID = -17711, /* invalid config */
351 XD3_INVALID_INPUT = -17712, /* invalid input/decoder error */
352 XD3_NOSECOND = -17713, /* when secondary compression finds no
354 XD3_UNIMPLEMENTED = -17714, /* currently VCD_TARGET */
357 /* special values in config->flags */
360 XD3_JUST_HDR = (1 << 1), /* used by VCDIFF tools, see
362 XD3_SKIP_WINDOW = (1 << 2), /* used by VCDIFF tools, see
364 XD3_SKIP_EMIT = (1 << 3), /* used by VCDIFF tools, see
366 XD3_FLUSH = (1 << 4), /* flush the stream buffer to
368 xd3_stream_close(). */
370 XD3_SEC_DJW = (1 << 5), /* use DJW static huffman */
371 XD3_SEC_FGK = (1 << 6), /* use FGK adaptive huffman */
372 XD3_SEC_TYPE = (XD3_SEC_DJW | XD3_SEC_FGK),
374 XD3_SEC_NODATA = (1 << 7), /* disable secondary compression of
376 XD3_SEC_NOINST = (1 << 8), /* disable secondary compression of
378 XD3_SEC_NOADDR = (1 << 9), /* disable secondary compression of
381 XD3_SEC_NOALL = (XD3_SEC_NODATA | XD3_SEC_NOINST | XD3_SEC_NOADDR),
383 XD3_ADLER32 = (1 << 10), /* enable checksum computation in
385 XD3_ADLER32_NOVER = (1 << 11), /* disable checksum verification in
388 XD3_ALT_CODE_TABLE = (1 << 12), /* for testing th
389 e alternate code table encoding. */
391 XD3_NOCOMPRESS = (1 << 13), /* disable ordinary data
392 * compression feature, only search
393 * the source, not the target. */
394 XD3_BEGREEDY = (1 << 14), /* disable the "1.5-pass
395 * algorithm", instead use greedy
396 * matching. Greedy is off by
398 XD3_ADLER32_RECODE = (1 << 15), /* used by "recode". */
400 /* 4 bits to set the compression level the same as the command-line
401 * setting -1 through -9 (-0 corresponds to the XD3_NOCOMPRESS flag,
402 * and is independent of compression level). This is for
403 * convenience, especially with xd3_encode_memory(). */
405 XD3_COMPLEVEL_SHIFT = 20, /* 20 - 24 */
406 XD3_COMPLEVEL_MASK = (0xF << XD3_COMPLEVEL_SHIFT),
407 XD3_COMPLEVEL_1 = (1 << XD3_COMPLEVEL_SHIFT),
408 XD3_COMPLEVEL_2 = (2 << XD3_COMPLEVEL_SHIFT),
409 XD3_COMPLEVEL_3 = (3 << XD3_COMPLEVEL_SHIFT),
410 XD3_COMPLEVEL_6 = (6 << XD3_COMPLEVEL_SHIFT),
411 XD3_COMPLEVEL_9 = (9 << XD3_COMPLEVEL_SHIFT),
415 /* The values of this enumeration are set in xd3_config using the
416 * smatch_cfg variable. It can be set to default, slow, fast, etc.,
420 XD3_SMATCH_DEFAULT = 0, /* Flags may contain XD3_COMPLEVEL bits,
424 XD3_SMATCH_FASTER = 3,
425 XD3_SMATCH_FASTEST = 4,
429 /*********************************************************************
431 **********************************************************************/
433 /* stream->match_state is part of the xd3_encode_input state machine
434 * for source matching:
436 * 1. the XD3_GETSRCBLK block-read mechanism means reentrant matching
437 * 2. this state spans encoder windows: a match and end-of-window
438 * will continue in the next 3. the initial target byte and source
439 * byte are a presumed match, to avoid some computation in case the
440 * inputs are identical.
444 MATCH_TARGET = 0, /* in this state, attempt to match the start of
445 * the target with the previously set source
446 * address (initially 0). */
447 MATCH_BACKWARD = 1, /* currently expanding a match backward in the
449 MATCH_FORWARD = 2, /* currently expanding a match forward in the
451 MATCH_SEARCHING = 3, /* currently searching for a match. */
455 /* The xd3_encode_input state machine steps through these states in
456 * the following order. The matcher is reentrant and returns
457 * XD3_INPUT whenever it requires more data. After receiving
458 * XD3_INPUT, if the application reads EOF it should call
459 * xd3_stream_close().
463 ENC_INIT = 0, /* xd3_encode_input has never been called. */
464 ENC_INPUT = 1, /* waiting for xd3_avail_input () to be called. */
465 ENC_SEARCH = 2, /* currently searching for matches. */
466 ENC_INSTR = 3, /* currently formatting output. */
467 ENC_FLUSH = 4, /* currently emitting output. */
468 ENC_POSTOUT = 5, /* after an output section. */
469 ENC_POSTWIN = 6, /* after all output sections. */
470 ENC_ABORTED = 7, /* abort. */
473 /* The xd3_decode_input state machine steps through these states in
474 * the following order. The matcher is reentrant and returns
475 * XD3_INPUT whenever it requires more data. After receiving
476 * XD3_INPUT, if the application reads EOF it should call
477 * xd3_stream_close().
479 * 0-8: the VCDIFF header
480 * 9-18: the VCDIFF window header
481 * 19-21: the three primary sections: data, inst, addr
482 * 22: producing output: returns XD3_OUTPUT, possibly XD3_GETSRCBLK,
483 * 23: return XD3_WINFINISH, set state=9 to decode more input
487 DEC_VCHEAD = 0, /* VCDIFF header */
488 DEC_HDRIND = 1, /* header indicator */
490 DEC_SECONDID = 2, /* secondary compressor ID */
492 DEC_TABLEN = 3, /* code table length */
493 DEC_NEAR = 4, /* code table near */
494 DEC_SAME = 5, /* code table same */
495 DEC_TABDAT = 6, /* code table data */
497 DEC_APPLEN = 7, /* application data length */
498 DEC_APPDAT = 8, /* application data */
500 DEC_WININD = 9, /* window indicator */
502 DEC_CPYLEN = 10, /* copy window length */
503 DEC_CPYOFF = 11, /* copy window offset */
505 DEC_ENCLEN = 12, /* length of delta encoding */
506 DEC_TGTLEN = 13, /* length of target window */
507 DEC_DELIND = 14, /* delta indicator */
509 DEC_DATALEN = 15, /* length of ADD+RUN data */
510 DEC_INSTLEN = 16, /* length of instruction data */
511 DEC_ADDRLEN = 17, /* length of address data */
513 DEC_CKSUM = 18, /* window checksum */
515 DEC_DATA = 19, /* data section */
516 DEC_INST = 20, /* instruction section */
517 DEC_ADDR = 21, /* address section */
519 DEC_EMIT = 22, /* producing data */
521 DEC_FINISH = 23, /* window finished */
523 DEC_ABORTED = 24, /* xd3_abort_stream */
526 /************************************************************
528 ************************************************************/
530 /* instruction lists used in the IOPT buffer */
537 /* the raw encoding of an instruction used in the IOPT buffer */
550 /* the code-table form of an single- or double-instruction */
559 /* the decoded form of a single (half) instruction. */
563 uint32_t size; /* TODO: why decode breaks if this is usize_t? */
564 uint32_t addr; /* TODO: why decode breaks if this is usize_t? */
567 /* the form of a whole-file instruction */
570 uint8_t type; /* RUN, ADD, COPY */
571 uint8_t mode; /* 0, VCD_SOURCE, VCD_TARGET */
574 xoff_t position; /* absolute position of this inst */
577 /* used by the encoder to buffer output in sections. list of blocks. */
583 xd3_output *next_page;
586 /* used by the decoder to buffer input in sections. */
590 const uint8_t *buf_max;
591 uint32_t size; /* TODO: why decode breaks if this is usize_t? */
594 /* used in xdelta3-decode.h */
598 /* used in xdelta3-second.h */
603 /* the VCDIFF address cache, see the RFC */
604 struct _xd3_addr_cache
608 usize_t next_slot; /* the circular index for near */
609 usize_t *near_array; /* array of size s_near */
610 usize_t *same_array; /* array of size s_same*256 */
613 /* the IOPT buffer list is just a list of buffers, which may be allocated
614 * during encode when using an unlimited buffer. */
615 struct _xd3_iopt_buflist
618 xd3_iopt_buflist *next;
621 /* This is the record of a pre-compiled configuration, a subset of
626 int (*string_match) (xd3_stream *stream);
631 usize_t small_lchain;
636 /* hash table size & power-of-two hash function. */
650 /* window info (for whole state) */
651 struct _xd3_wininfo {
657 /* whole state for, e.g., merge */
658 struct _xd3_whole_state {
668 xd3_wininfo *wininfo;
669 usize_t wininfo_alloc;
674 /********************************************************************
676 *******************************************************************/
678 /* Settings for the secondary compressor. */
681 int data_type; /* Which section. (set automatically) */
682 usize_t ngroups; /* Number of DJW Huffman groups. */
683 usize_t sector_size; /* Sector size. */
684 int inefficient; /* If true, ignore efficiency check [avoid XD3_NOSECOND]. */
687 /* This is the user-visible stream configuration. */
690 usize_t winsize; /* The encoder window size. */
691 usize_t sprevsz; /* How far back small string
693 usize_t iopt_size; /* entries in the
694 instruction-optimizing
696 usize_t srcwin_maxsz; /* srcwin_size grows by a factor
697 of 2 when no matches are
698 found. encoder will not seek
699 back further than this. */
701 xd3_getblk_func *getblk; /* The three callbacks. */
702 xd3_alloc_func *alloc;
703 xd3_free_func *freef;
704 void *opaque; /* Not used. */
705 int flags; /* stream->flags are initialized
706 * from xd3_config & never
707 * modified by the library. Use
708 * xd3_set_flags to modify flags
709 * settings mid-stream. */
711 xd3_sec_cfg sec_data; /* Secondary compressor config: data */
712 xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */
713 xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */
715 xd3_smatch_cfg smatch_cfg; /* See enum: use fields below for
717 xd3_smatcher smatcher_soft;
720 /* The primary source file object. You create one of these objects and
721 * initialize the first four fields. This library maintains the next
722 * 5 fields. The configured getblk implementation is responsible for
723 * setting the final 3 fields when called (and/or when XD3_GETSRCBLK
729 usize_t blksize; /* block size */
730 const char *name; /* its name, for debug/print
732 void *ioh; /* opaque handle */
735 xoff_t curblkno; /* current block number: client
736 sets after getblk request */
737 usize_t onblk; /* number of bytes on current
738 block: client sets, must be >= 0
740 const uint8_t *curblk; /* current block array: client
741 sets after getblk request */
744 usize_t srclen; /* length of this source window */
745 xoff_t srcbase; /* offset of this source window
746 in the source itself */
747 int shiftby; /* for power-of-two blocksizes */
748 int maskby; /* for power-of-two blocksizes */
749 xoff_t cpyoff_blocks; /* offset of dec_cpyoff in blocks */
750 usize_t cpyoff_blkoff; /* offset of copy window in
752 xoff_t getblkno; /* request block number: xd3 sets
753 current getblk request */
755 /* See xd3_getblk() */
756 xoff_t max_blkno; /* Maximum block, if eof is known,
757 * otherwise, equals frontier_blkno
759 xoff_t frontier_blkno; /* If eof is unknown, the next
760 * source position to be read.
761 * Otherwise, equal to
763 usize_t onlastblk; /* Number of bytes on max_blkno */
764 int eof_known; /* Set to true when the first
765 * partial block is read. */
768 /* The primary xd3_stream object, used for encoding and decoding. You
769 * may access only two fields: avail_out, next_out. Use the methods
770 * above to operate on xd3_stream. */
774 const uint8_t *next_in; /* next input byte */
775 usize_t avail_in; /* number of bytes available at
777 xoff_t total_in; /* how many bytes in */
780 uint8_t *next_out; /* next output byte */
781 usize_t avail_out; /* number of bytes available at
783 usize_t space_out; /* total out space */
784 xoff_t current_window; /* number of windows encoded/decoded */
785 xoff_t total_out; /* how many bytes out */
787 /* to indicate an error, xd3 sets */
788 const char *msg; /* last error message, NULL if
791 /* source configuration */
792 xd3_source *src; /* source array */
794 /* encoder memory configuration */
795 usize_t winsize; /* suggested window size */
796 usize_t sprevsz; /* small string, previous window
798 usize_t sprevmask; /* small string, previous window
801 usize_t iopt_unlimited;
802 usize_t srcwin_maxsz;
804 /* general configuration */
805 xd3_getblk_func *getblk; /* set nxtblk, nxtblkno to scanblkno */
806 xd3_alloc_func *alloc; /* malloc function */
807 xd3_free_func *free; /* free function */
808 void* opaque; /* private data object passed to
809 alloc, free, and getblk */
810 int flags; /* various options */
812 /* secondary compressor configuration */
813 xd3_sec_cfg sec_data; /* Secondary compressor config: data */
814 xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */
815 xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */
817 xd3_smatcher smatcher;
819 usize_t *large_table; /* table of large checksums */
820 xd3_hash_cfg large_hash; /* large hash config */
822 usize_t *small_table; /* table of small checksums */
823 xd3_slist *small_prev; /* table of previous offsets,
824 circular linked list */
825 int small_reset; /* true if small table should
828 xd3_hash_cfg small_hash; /* small hash config */
829 xd3_addr_cache acache; /* the vcdiff address cache */
830 xd3_encode_state enc_state; /* state of the encoder */
832 usize_t taroff; /* base offset of the target input */
833 usize_t input_position; /* current input position */
834 usize_t min_match; /* current minimum match
835 length, avoids redundent
837 usize_t unencoded_offset; /* current input, first
838 * unencoded offset. this value
839 * is <= the first instruction's
840 * position in the iopt buffer,
841 * if there is at least one
842 * match in the buffer. */
845 // these variables plus srcwin_maxsz above (set by config)
846 int srcwin_decided; /* boolean: true if srclen and
849 int srcwin_decided_early; /* boolean: true if srclen
852 xoff_t srcwin_cksum_pos; /* Source checksum position */
855 xd3_match_state match_state; /* encoder match state */
856 xoff_t match_srcpos; /* current match source
859 xoff_t match_last_srcpos; /* previously attempted
860 * srcpos, to avoid loops. */
861 xoff_t match_minaddr; /* smallest matching address to
862 * set window params (reset each
863 * window xd3_encode_reset) */
864 xoff_t match_maxaddr; /* largest matching address to
865 * set window params (reset each
866 * window xd3_encode_reset) */
867 usize_t match_back; /* match extends back so far */
868 usize_t match_maxback; /* match extends back maximum */
869 usize_t match_fwd; /* match extends forward so far */
870 usize_t match_maxfwd; /* match extends forward maximum */
872 xoff_t maxsrcaddr; /* address of the last source
873 match (across windows) */
875 uint8_t *buf_in; /* for saving buffered input */
876 usize_t buf_avail; /* amount of saved input */
877 const uint8_t *buf_leftover; /* leftover content of next_in
878 (i.e., user's buffer) */
879 usize_t buf_leftavail; /* amount of leftover content */
881 xd3_output *enc_current; /* current output buffer */
882 xd3_output *enc_free; /* free output buffers */
883 xd3_output *enc_heads[4]; /* array of encoded outputs:
885 xd3_output *enc_tails[4]; /* array of encoded outputs:
887 uint32_t recode_adler32; /* set the adler32 checksum
888 * during "recode". */
890 xd3_rlist iopt_used; /* instruction optimizing buffer */
892 xd3_rinst *iout; /* next single instruction */
893 xd3_iopt_buflist *iopt_alloc;
895 const uint8_t *enc_appheader; /* application header to encode */
896 usize_t enc_appheadsz; /* application header size */
899 xd3_decode_state dec_state; /* current DEC_XXX value */
900 usize_t dec_hdr_ind; /* VCDIFF header indicator */
901 usize_t dec_win_ind; /* VCDIFF window indicator */
902 usize_t dec_del_ind; /* VCDIFF delta indicator */
904 uint8_t dec_magic[4]; /* First four bytes */
905 usize_t dec_magicbytes; /* Magic position. */
907 usize_t dec_secondid; /* Optional secondary compressor ID. */
909 /* TODO: why decode breaks if this is usize_t? */
910 uint32_t dec_codetblsz; /* Optional code table: length. */
911 uint8_t *dec_codetbl; /* Optional code table: storage. */
912 usize_t dec_codetblbytes; /* Optional code table: position. */
914 /* TODO: why decode breaks if this is usize_t? */
915 uint32_t dec_appheadsz; /* Optional application header:
917 uint8_t *dec_appheader; /* Optional application header:
919 usize_t dec_appheadbytes; /* Optional application header:
922 usize_t dec_cksumbytes; /* Optional checksum: position. */
923 uint8_t dec_cksum[4]; /* Optional checksum: storage. */
924 uint32_t dec_adler32; /* Optional checksum: value. */
926 /* TODO: why decode breaks if this is usize_t? */
927 uint32_t dec_cpylen; /* length of copy window
928 (VCD_SOURCE or VCD_TARGET) */
929 xoff_t dec_cpyoff; /* offset of copy window
930 (VCD_SOURCE or VCD_TARGET) */
931 /* TODO: why decode breaks if this is usize_t? */
932 uint32_t dec_enclen; /* length of delta encoding */
933 /* TODO: why decode breaks if this is usize_t? */
934 uint32_t dec_tgtlen; /* length of target window */
937 uint64_t dec_64part; /* part of a decoded uint64_t */
940 uint32_t dec_32part; /* part of a decoded uint32_t */
943 xoff_t dec_winstart; /* offset of the start of
944 current target window */
945 xoff_t dec_window_count; /* == current_window + 1 in
947 usize_t dec_winbytes; /* bytes of the three sections
949 usize_t dec_hdrsize; /* VCDIFF + app header size */
951 const uint8_t *dec_tgtaddrbase; /* Base of decoded target
954 const uint8_t *dec_cpyaddrbase; /* Base of decoded copy
958 usize_t dec_position; /* current decoder position
961 usize_t dec_maxpos; /* maximum decoder position
964 xd3_hinst dec_current1; /* current instruction */
965 xd3_hinst dec_current2; /* current instruction */
967 uint8_t *dec_buffer; /* Decode buffer */
968 uint8_t *dec_lastwin; /* In case of VCD_TARGET, the
969 last target window. */
970 usize_t dec_lastlen; /* length of the last target
972 xoff_t dec_laststart; /* offset of the start of last
974 usize_t dec_lastspace; /* allocated space of last
975 target window, for reuse */
977 xd3_desect inst_sect; /* staging area for decoding
979 xd3_desect addr_sect;
980 xd3_desect data_sect;
982 xd3_code_table_func *code_table_func;
983 xd3_comp_table_func *comp_table_func;
984 const xd3_dinst *code_table;
985 const xd3_code_table_desc *code_table_desc;
986 xd3_dinst *code_table_alloc;
988 /* secondary compression */
989 const xd3_sec_type *sec_type;
990 xd3_sec_stream *sec_stream_d;
991 xd3_sec_stream *sec_stream_i;
992 xd3_sec_stream *sec_stream_a;
994 /* state for reconstructing whole files (e.g., for merge), this only
995 * supports loading USIZE_T_MAX instructions, adds, etc. */
996 xd3_whole_state whole_target;
1009 usize_t i_slots_used;
1012 usize_t large_ckcnt;
1020 /**************************************************************************
1022 **************************************************************************/
1024 /* This function configures an xd3_stream using the provided in-memory
1025 * input buffer, source buffer, output buffer, and flags. The output
1026 * array must be large enough or else ENOSPC will be returned. This
1027 * is the simplest in-memory encoding interface. */
1028 int xd3_encode_memory (const uint8_t *input,
1030 const uint8_t *source,
1031 usize_t source_size,
1032 uint8_t *output_buffer,
1033 usize_t *output_size,
1034 usize_t avail_output,
1037 /* The reverse of xd3_encode_memory. */
1038 int xd3_decode_memory (const uint8_t *input,
1040 const uint8_t *source,
1041 usize_t source_size,
1042 uint8_t *output_buf,
1043 usize_t *output_size,
1044 usize_t avail_output,
1047 /* This function encodes an in-memory input using a pre-configured
1048 * xd3_stream. This allows the caller to set a variety of options
1049 * which are not available in the xd3_encode/decode_memory()
1052 * The output array must be large enough to hold the output or else
1053 * ENOSPC is returned. The source (if any) should be set using
1054 * xd3_set_source_and_size() with a single-block xd3_source. This
1055 * calls the underlying non-blocking interfaces,
1056 * xd3_encode/decode_input(), handling the necessary input/output
1057 * states. This method may be considered a reference for any
1058 * application using xd3_encode_input() directly.
1060 * xd3_stream stream;
1061 * xd3_config config;
1064 * memset (& src, 0, sizeof (src));
1065 * memset (& stream, 0, sizeof (stream));
1066 * memset (& config, 0, sizeof (config));
1068 * if (source != NULL)
1070 * src.size = source_size;
1071 * src.blksize = source_size;
1073 * src.onblk = source_size;
1074 * src.curblk = source;
1075 * xd3_set_source(&stream, &src);
1078 * config.flags = flags;
1079 * config.srcwin_maxsz = source_size;
1080 * config.winsize = input_size;
1082 * ... set smatcher, appheader, encoding-table, compression-level, etc.
1084 * xd3_config_stream(&stream, &config);
1085 * xd3_encode_stream(&stream, ...);
1086 * xd3_free_stream(&stream);
1088 int xd3_encode_stream (xd3_stream *stream,
1089 const uint8_t *input,
1092 usize_t *output_size,
1093 usize_t avail_output);
1095 /* The reverse of xd3_encode_stream. */
1096 int xd3_decode_stream (xd3_stream *stream,
1097 const uint8_t *input,
1100 usize_t *output_size,
1101 usize_t avail_size);
1103 /* This is the non-blocking interface.
1105 * Handling input and output states is the same for encoding or
1106 * decoding using the xd3_avail_input() and xd3_consume_output()
1107 * routines, inlined below.
1111 * XD3_INPUT: the process requires more input: call
1112 * xd3_avail_input() then repeat
1114 * XD3_OUTPUT: the process has more output: read stream->next_out,
1115 * stream->avail_out, then call xd3_consume_output(),
1118 * XD3_GOTHEADER: (decoder-only) notification returned following the
1119 * VCDIFF header and first window header. the decoder
1120 * may use the header to configure itself.
1122 * XD3_WINSTART: a general notification returned once for each
1123 * window except the 0-th window, which is implied by
1124 * XD3_GOTHEADER. It is recommended to use a
1125 * switch-stmt such as:
1129 * switch ((ret = xd3_decode_input (stream))) {
1130 * case XD3_GOTHEADER: {
1131 * assert(stream->current_window == 0);
1135 * case XD3_WINSTART: {
1136 * something(stream->current_window);
1141 * XD3_WINFINISH: a general notification, following the complete
1142 * input & output of a window. at this point,
1143 * stream->total_in and stream->total_out are consistent
1144 * for either encoding or decoding.
1146 * XD3_GETSRCBLK: If the xd3_getblk() callback is NULL, this value
1147 * is returned to initiate a non-blocking source read.
1149 int xd3_decode_input (xd3_stream *stream);
1150 int xd3_encode_input (xd3_stream *stream);
1152 /* The xd3_config structure is used to initialize a stream - all data
1153 * is copied into stream so config may be a temporary variable. See
1154 * the [documentation] or comments on the xd3_config structure. */
1155 int xd3_config_stream (xd3_stream *stream,
1156 xd3_config *config);
1158 /* Since Xdelta3 doesn't open any files, xd3_close_stream is just an
1159 * error check that the stream is in a proper state to be closed: this
1160 * means the encoder is flushed and the decoder is at a window
1161 * boundary. The application is responsible for freeing any of the
1162 * resources it supplied. */
1163 int xd3_close_stream (xd3_stream *stream);
1165 /* This arranges for closes the stream to succeed. Does not free the
1167 void xd3_abort_stream (xd3_stream *stream);
1169 /* xd3_free_stream frees all memory allocated for the stream. The
1170 * application is responsible for freeing any of the resources it
1172 void xd3_free_stream (xd3_stream *stream);
1174 /* This function informs the encoder or decoder that source matching
1175 * (i.e., delta-compression) is possible. For encoding, this should
1176 * be called before the first xd3_encode_input. A NULL source is
1177 * ignored. For decoding, this should be called before the first
1178 * window is decoded, but the appheader may be read first
1179 * (XD3_GOTHEADER). After decoding the header, call xd3_set_source()
1180 * if you have a source file. Note: if (stream->dec_win_ind & VCD_SOURCE)
1181 * is true, it means the first window expects there to be a source file.
1183 int xd3_set_source (xd3_stream *stream,
1184 xd3_source *source);
1186 /* If the source size is known, call this instead of xd3_set_source().
1187 * to avoid having stream->getblk called (and/or to avoid XD3_GETSRCBLK).
1189 * Follow these steps:
1191 memset(&source, 0, sizeof(source));
1192 source.blksize = size;
1193 source.onblk = size;
1194 source.curblk = buf;
1195 source.curblkno = 0;
1196 int ret = xd3_set_source_and_size(&stream, &source, size);
1199 int xd3_set_source_and_size (xd3_stream *stream,
1201 xoff_t source_size);
1203 /* This should be called before the first call to xd3_encode_input()
1204 * to include application-specific data in the VCDIFF header. */
1205 void xd3_set_appheader (xd3_stream *stream,
1206 const uint8_t *data,
1209 /* xd3_get_appheader may be called in the decoder after XD3_GOTHEADER.
1210 * For convenience, the decoder always adds a single byte padding to
1211 * the end of the application header, which is set to zero in case the
1212 * application header is a string. */
1213 int xd3_get_appheader (xd3_stream *stream,
1217 /* To generate a VCDIFF encoded delta with xd3_encode_init() from
1218 * another format, use:
1220 * xd3_encode_init_partial() -- initialze encoder state (w/o hash tables)
1221 * xd3_init_cache() -- reset VCDIFF address cache
1222 * xd3_found_match() -- to report a copy instruction
1224 * set stream->enc_state to ENC_INSTR and call xd3_encode_input as usual.
1226 int xd3_encode_init_partial (xd3_stream *stream);
1227 void xd3_init_cache (xd3_addr_cache* acache);
1228 int xd3_found_match (xd3_stream *stream,
1229 usize_t pos, usize_t size,
1230 xoff_t addr, int is_source);
1232 /* Gives an error string for xdelta3-speficic errors, returns NULL for
1234 const char* xd3_strerror (int ret);
1236 /* For convenience, zero & initialize the xd3_config structure with
1239 void xd3_init_config (xd3_config *config,
1242 memset (config, 0, sizeof (*config));
1243 config->flags = flags;
1246 /* This supplies some input to the stream.
1248 * For encoding, if the input is larger than the configured window
1249 * size (xd3_config.winsize), the entire input will be consumed and
1250 * encoded anyway. If you wish to strictly limit the window size,
1251 * limit the buffer passed to xd3_avail_input to the window size.
1253 * For encoding, if the input is smaller than the configured window
1254 * size (xd3_config.winsize), the library will create a window-sized
1255 * buffer and accumulate input until a full-sized window can be
1256 * encoded. XD3_INPUT will be returned. The input must remain valid
1257 * until the next time xd3_encode_input() returns XD3_INPUT.
1259 * For decoding, the input will be consumed entirely before XD3_INPUT
1260 * is returned again.
1263 void xd3_avail_input (xd3_stream *stream,
1264 const uint8_t *idata,
1267 /* Even if isize is zero, the code expects a non-NULL idata. Why?
1268 * It uses this value to determine whether xd3_avail_input has ever
1269 * been called. If xd3_encode_input is called before
1270 * xd3_avail_input it will return XD3_INPUT right away without
1271 * allocating a stream->winsize buffer. This is to avoid an
1272 * unwanted allocation. */
1273 XD3_ASSERT (idata != NULL || isize == 0);
1275 stream->next_in = idata;
1276 stream->avail_in = isize;
1279 /* This acknowledges receipt of output data, must be called after any
1280 * XD3_OUTPUT return. */
1282 void xd3_consume_output (xd3_stream *stream)
1284 stream->avail_out = 0;
1287 /* These are set for each XD3_WINFINISH return. */
1289 int xd3_encoder_used_source (xd3_stream *stream) {
1290 return stream->src != NULL && stream->src->srclen > 0;
1293 xoff_t xd3_encoder_srcbase (xd3_stream *stream) {
1294 return stream->src->srcbase;
1297 usize_t xd3_encoder_srclen (xd3_stream *stream) {
1298 return stream->src->srclen;
1301 /* Checks for legal flag changes. */
1303 void xd3_set_flags (xd3_stream *stream, int flags)
1305 /* The bitwise difference should contain only XD3_FLUSH or
1307 XD3_ASSERT(((flags ^ stream->flags) & ~(XD3_FLUSH | XD3_SKIP_WINDOW)) == 0);
1308 stream->flags = flags;
1311 /* Gives some extra information about the latest library error, if any
1314 const char* xd3_errstring (xd3_stream *stream)
1316 return stream->msg ? stream->msg : "";
1320 /* 64-bit divisions are expensive, which is why we require a
1321 * power-of-two source->blksize. To relax this restriction is
1322 * relatively easy, see the history for xd3_blksize_div(). */
1324 void xd3_blksize_div (const xoff_t offset,
1325 const xd3_source *source,
1328 *blkno = (xoff_t) (offset >> source->shiftby);
1329 *blkoff = (usize_t) (offset & source->maskby);
1330 XD3_ASSERT (*blkoff < source->blksize);
1334 void xd3_blksize_add (xoff_t *blkno,
1336 const xd3_source *source,
1341 /* Does not check for overflow, checked in xdelta3-decode.h. */
1343 blkdiff = *blkoff >> source->shiftby;
1348 *blkoff &= source->maskby;
1351 XD3_ASSERT (*blkoff < source->blksize);
1357 #define XD3_CPY 3U /* XD3_CPY rtypes are represented as (XD3_CPY +
1358 * copy-mode value) */
1361 #define IF_DEBUG(x) x
1366 #define IF_DEBUG1(x) x
1368 #define IF_DEBUG1(x)
1371 #define IF_DEBUG2(x) x
1373 #define IF_DEBUG2(x)
1376 #define SIZEOF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
1378 #endif /* _XDELTA3_H_ */