1 /* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2002, 2006, 2007. Joshua P. MacDonald
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 /* For demonstration purposes only.
22 #ifndef _XDELTA3_FGK_h_
23 #define _XDELTA3_FGK_h_
25 /* An implementation of the FGK algorithm described by D.E. Knuth in
26 * "Dynamic Huffman Coding" in Journal of Algorithms 6. */
28 /* A 32bit counter (fgk_weight) is used as the frequency counter for
29 * nodes in the huffman tree. TODO: Need oto test for overflow and/or
32 typedef struct _fgk_stream fgk_stream;
33 typedef struct _fgk_node fgk_node;
34 typedef struct _fgk_block fgk_block;
35 typedef unsigned int fgk_bit;
36 typedef uint32_t fgk_weight;
41 fgk_block *un_freeptr;
45 #define block_leader un.un_leader
46 #define block_freeptr un.un_freeptr
48 /* The code can also support fixed huffman encoding/decoding. */
51 /* weight is a count of the number of times this element has been seen
52 * in the current encoding/decoding. parent, right_child, and
53 * left_child are pointers defining the tree structure. right and
54 * left point to neighbors in an ordered sequence of weights. The
55 * left child of a node is always guaranteed to have weight not
56 * greater than its sibling. fgk_blockLeader points to the element
57 * with the same weight as itself which is closest to the next
58 * increasing weight block. */
64 fgk_node *right_child;
70 /* alphabet_size is the a count of the number of possible leaves in
71 * the huffman tree. The number of total nodes counting internal
72 * nodes is ((2 * alphabet_size) - 1). zero_freq_count is the number
73 * of elements remaining which have zero frequency. zero_freq_exp and
74 * zero_freq_rem satisfy the equation zero_freq_count =
75 * 2^zero_freq_exp + zero_freq_rem. root_node is the root of the
76 * tree, which is initialized to a node with zero frequency and
77 * contains the 0th such element. free_node contains a pointer to the
78 * next available fgk_node space. alphabet contains all the elements
79 * and is indexed by N. remaining_zeros points to the head of the
83 usize_t alphabet_size;
84 usize_t zero_freq_count;
85 usize_t zero_freq_exp;
86 usize_t zero_freq_rem;
94 fgk_block *block_array;
95 fgk_block *free_block;
98 fgk_node *remaining_zeros;
104 /*********************************************************************/
106 /*********************************************************************/
108 static fgk_stream* fgk_alloc (xd3_stream *stream /*, usize_t alphabet_size */);
109 static int fgk_init (xd3_stream *stream,
112 static int fgk_encode_data (fgk_stream *h,
114 static inline fgk_bit fgk_get_encoded_bit (fgk_stream *h);
116 static int xd3_encode_fgk (xd3_stream *stream,
117 fgk_stream *sec_stream,
122 /*********************************************************************/
124 /*********************************************************************/
126 static inline int fgk_decode_bit (fgk_stream *h,
128 static int fgk_decode_data (fgk_stream *h);
129 static void fgk_destroy (xd3_stream *stream,
132 static int xd3_decode_fgk (xd3_stream *stream,
133 fgk_stream *sec_stream,
134 const uint8_t **input,
135 const uint8_t *const input_end,
137 const uint8_t *const output_end);
139 /*********************************************************************/
141 /*********************************************************************/
143 static unsigned int fgk_find_nth_zero (fgk_stream *h, usize_t n);
144 static usize_t fgk_nth_zero (fgk_stream *h, usize_t n);
145 static void fgk_update_tree (fgk_stream *h, usize_t n);
146 static fgk_node* fgk_increase_zero_weight (fgk_stream *h, usize_t n);
147 static void fgk_eliminate_zero (fgk_stream* h, fgk_node *node);
148 static void fgk_move_right (fgk_stream *h, fgk_node *node);
149 static void fgk_promote (fgk_stream *h, fgk_node *node);
150 static void fgk_init_node (fgk_node *node, usize_t i, usize_t size);
151 static fgk_block* fgk_make_block (fgk_stream *h, fgk_node *l);
152 static void fgk_free_block (fgk_stream *h, fgk_block *b);
153 static void fgk_factor_remaining (fgk_stream *h);
154 static inline void fgk_swap_ptrs (fgk_node **one, fgk_node **two);
156 /*********************************************************************/
158 /*********************************************************************/
160 /* returns an initialized huffman encoder for an alphabet with the
161 * given size. returns NULL if enough memory cannot be allocated */
162 static fgk_stream* fgk_alloc (xd3_stream *stream /*, int alphabet_size0 */)
164 usize_t alphabet_size0 = ALPHABET_SIZE;
167 if ((h = (fgk_stream*) xd3_alloc (stream, 1, sizeof (fgk_stream))) == NULL)
172 h->total_nodes = (2 * alphabet_size0) - 1;
173 h->total_blocks = (2 * h->total_nodes);
174 h->alphabet = (fgk_node*) xd3_alloc (stream, h->total_nodes, sizeof (fgk_node));
175 h->block_array = (fgk_block*) xd3_alloc (stream, h->total_blocks, sizeof (fgk_block));
176 h->coded_bits = (fgk_bit*) xd3_alloc (stream, alphabet_size0, sizeof (fgk_bit));
178 if (h->coded_bits == NULL ||
179 h->alphabet == NULL ||
180 h->block_array == NULL)
182 fgk_destroy (stream, h);
186 h->alphabet_size = alphabet_size0;
191 static int fgk_init (xd3_stream *stream, fgk_stream *h, int is_encode)
196 h->root_node = h->alphabet;
197 h->decode_ptr = h->root_node;
198 h->free_node = h->alphabet + h->alphabet_size;
199 h->remaining_zeros = h->alphabet;
201 h->zero_freq_count = h->alphabet_size + 2;
203 /* after two calls to factor_remaining, zero_freq_count == alphabet_size */
204 fgk_factor_remaining(h); /* set ZFE and ZFR */
205 fgk_factor_remaining(h); /* set ZFDB according to prev state */
207 IF_DEBUG (memset (h->alphabet, 0, sizeof (h->alphabet[0]) * h->total_nodes));
209 for (ui = 0; ui < h->total_blocks-1; ui += 1)
211 h->block_array[ui].block_freeptr = &h->block_array[ui + 1];
214 h->block_array[h->total_blocks - 1].block_freeptr = NULL;
215 h->free_block = h->block_array;
217 /* Zero frequency nodes are inserted in the first alphabet_size
218 * positions, with Value, weight, and a pointer to the next zero
220 for (si = h->alphabet_size - 1; si >= 0; si -= 1)
222 fgk_init_node (h->alphabet + si, (usize_t) si, h->alphabet_size);
228 static void fgk_swap_ptrs(fgk_node **one, fgk_node **two)
230 fgk_node *tmp = *one;
235 /* Takes huffman transmitter h and n, the nth elt in the alphabet, and
236 * returns the number of required to encode n. */
237 static int fgk_encode_data (fgk_stream* h, usize_t n)
239 fgk_node *target_ptr = h->alphabet + n;
241 XD3_ASSERT (n < h->alphabet_size);
245 /* First encode the binary representation of the nth remaining
246 * zero frequency element in reverse such that bit, which will be
247 * encoded from h->coded_depth down to 0 will arrive in increasing
248 * order following the tree path. If there is only one left, it
249 * is not neccesary to encode these bits. */
250 if (IS_ADAPTIVE && target_ptr->weight == 0)
252 unsigned int where, shift;
255 where = fgk_find_nth_zero(h, n);
258 if (h->zero_freq_rem == 0)
260 bits = h->zero_freq_exp;
264 bits = h->zero_freq_exp + 1;
269 h->coded_bits[h->coded_depth++] = (shift & where) && 1;
275 target_ptr = h->remaining_zeros;
278 /* The path from root to node is filled into coded_bits in reverse so
279 * that it is encoded in the right order */
280 while (target_ptr != h->root_node)
282 h->coded_bits[h->coded_depth++] = (target_ptr->parent->right_child == target_ptr);
284 target_ptr = target_ptr->parent;
289 fgk_update_tree(h, n);
292 return h->coded_depth;
295 /* Should be called as many times as fgk_encode_data returns.
297 static inline fgk_bit fgk_get_encoded_bit (fgk_stream *h)
299 XD3_ASSERT (h->coded_depth > 0);
301 return h->coded_bits[--h->coded_depth];
304 /* This procedure updates the tree after alphabet[n] has been encoded
307 static void fgk_update_tree (fgk_stream *h, usize_t n)
311 if (h->alphabet[n].weight == 0)
313 incr_node = fgk_increase_zero_weight (h, n);
317 incr_node = h->alphabet + n;
320 while (incr_node != h->root_node)
322 fgk_move_right (h, incr_node);
323 fgk_promote (h, incr_node);
324 incr_node->weight += 1; /* incr the parent */
325 incr_node = incr_node->parent; /* repeat */
328 h->root_node->weight += 1;
331 static void fgk_move_right (fgk_stream *h, fgk_node *move_fwd)
333 fgk_node **fwd_par_ptr, **back_par_ptr;
334 fgk_node *move_back, *tmp;
336 move_back = move_fwd->my_block->block_leader;
338 if (move_fwd == move_back ||
339 move_fwd->parent == move_back ||
340 move_fwd->weight == 0)
345 move_back->right->left = move_fwd;
349 move_fwd->left->right = move_back;
352 tmp = move_fwd->right;
353 move_fwd->right = move_back->right;
355 if (tmp == move_back)
357 move_back->right = move_fwd;
361 tmp->left = move_back;
362 move_back->right = tmp;
365 tmp = move_back->left;
366 move_back->left = move_fwd->left;
370 move_fwd->left = move_back;
374 tmp->right = move_fwd;
375 move_fwd->left = tmp;
378 if (move_fwd->parent->right_child == move_fwd)
380 fwd_par_ptr = &move_fwd->parent->right_child;
384 fwd_par_ptr = &move_fwd->parent->left_child;
387 if (move_back->parent->right_child == move_back)
389 back_par_ptr = &move_back->parent->right_child;
393 back_par_ptr = &move_back->parent->left_child;
396 fgk_swap_ptrs (&move_fwd->parent, &move_back->parent);
397 fgk_swap_ptrs (fwd_par_ptr, back_par_ptr);
399 move_fwd->my_block->block_leader = move_fwd;
402 /* Shifts node, the leader of its block, into the next block. */
403 static void fgk_promote (fgk_stream *h, fgk_node *node)
405 fgk_node *my_left, *my_right;
406 fgk_block *cur_block;
408 my_right = node->right;
409 my_left = node->left;
410 cur_block = node->my_block;
412 if (node->weight == 0)
417 /* if left is right child, parent of remaining zeros case (?), means parent
418 * has same weight as right child. */
419 if (my_left == node->right_child &&
421 node->left_child->weight == 0)
423 XD3_ASSERT (node->left_child == h->remaining_zeros);
424 XD3_ASSERT (node->right_child->weight == (node->weight+1)); /* child weight was already incremented */
426 if (node->weight == (my_right->weight - 1) && my_right != h->root_node)
428 fgk_free_block (h, cur_block);
429 node->my_block = my_right->my_block;
430 my_left->my_block = my_right->my_block;
436 if (my_left == h->remaining_zeros)
441 /* true if not the leftmost node */
442 if (my_left->my_block == cur_block)
444 my_left->my_block->block_leader = my_left;
448 fgk_free_block (h, cur_block);
451 /* node->parent != my_right */
452 if ((node->weight == (my_right->weight - 1)) && (my_right != h->root_node))
454 node->my_block = my_right->my_block;
458 node->my_block = fgk_make_block (h, node);
462 /* When an element is seen the first time this is called to remove it from the list of
463 * zero weight elements and introduce a new internal node to the tree. */
464 static fgk_node* fgk_increase_zero_weight (fgk_stream *h, usize_t n)
466 fgk_node *this_zero, *new_internal, *zero_ptr;
468 this_zero = h->alphabet + n;
470 if (h->zero_freq_count == 1)
472 /* this is the last one */
473 this_zero->right_child = NULL;
475 if (this_zero->right->weight == 1)
477 this_zero->my_block = this_zero->right->my_block;
481 this_zero->my_block = fgk_make_block (h, this_zero);
484 h->remaining_zeros = NULL;
489 zero_ptr = h->remaining_zeros;
491 new_internal = h->free_node++;
493 new_internal->parent = zero_ptr->parent;
494 new_internal->right = zero_ptr->right;
495 new_internal->weight = 0;
496 new_internal->right_child = this_zero;
497 new_internal->left = this_zero;
499 if (h->remaining_zeros == h->root_node)
501 /* This is the first element to be coded */
502 h->root_node = new_internal;
503 this_zero->my_block = fgk_make_block (h, this_zero);
504 new_internal->my_block = fgk_make_block (h, new_internal);
508 new_internal->right->left = new_internal;
510 if (zero_ptr->parent->right_child == zero_ptr)
512 zero_ptr->parent->right_child = new_internal;
516 zero_ptr->parent->left_child = new_internal;
519 if (new_internal->right->weight == 1)
521 new_internal->my_block = new_internal->right->my_block;
525 new_internal->my_block = fgk_make_block (h, new_internal);
528 this_zero->my_block = new_internal->my_block;
531 fgk_eliminate_zero (h, this_zero);
533 new_internal->left_child = h->remaining_zeros;
535 this_zero->right = new_internal;
536 this_zero->left = h->remaining_zeros;
537 this_zero->parent = new_internal;
538 this_zero->left_child = NULL;
539 this_zero->right_child = NULL;
541 h->remaining_zeros->parent = new_internal;
542 h->remaining_zeros->right = this_zero;
547 /* When a zero frequency element is encoded, it is followed by the
548 * binary representation of the index into the remaining elements.
549 * Sets a cache to the element before it so that it can be removed
550 * without calling this procedure again. */
551 static unsigned int fgk_find_nth_zero (fgk_stream* h, usize_t n)
553 fgk_node *target_ptr = h->alphabet + n;
554 fgk_node *head_ptr = h->remaining_zeros;
555 unsigned int idx = 0;
557 while (target_ptr != head_ptr)
559 head_ptr = head_ptr->right_child;
566 /* Splices node out of the list of zeros. */
567 static void fgk_eliminate_zero (fgk_stream* h, fgk_node *node)
569 if (h->zero_freq_count == 1)
574 fgk_factor_remaining(h);
576 if (node->left_child == NULL)
578 h->remaining_zeros = h->remaining_zeros->right_child;
579 h->remaining_zeros->left_child = NULL;
581 else if (node->right_child == NULL)
583 node->left_child->right_child = NULL;
587 node->right_child->left_child = node->left_child;
588 node->left_child->right_child = node->right_child;
592 static void fgk_init_node (fgk_node *node, usize_t i, usize_t size)
596 node->right_child = node + 1;
600 node->right_child = NULL;
605 node->left_child = node - 1;
609 node->left_child = NULL;
616 node->my_block = NULL;
619 /* The data structure used is an array of blocks, which are unions of
620 * free pointers and huffnode pointers. free blocks are a linked list
621 * of free blocks, the front of which is h->free_block. The used
622 * blocks are pointers to the head of each block. */
623 static fgk_block* fgk_make_block (fgk_stream *h, fgk_node* lead)
625 fgk_block *ret = h->free_block;
627 XD3_ASSERT (h->free_block != NULL);
629 h->free_block = h->free_block->block_freeptr;
631 ret->block_leader = lead;
636 /* Restores the block to the front of the free list. */
637 static void fgk_free_block (fgk_stream *h, fgk_block *b)
639 b->block_freeptr = h->free_block;
643 /* sets zero_freq_count, zero_freq_rem, and zero_freq_exp to satsity
644 * the equation given above. */
645 static void fgk_factor_remaining (fgk_stream *h)
649 i = (--h->zero_freq_count);
650 h->zero_freq_exp = 0;
654 h->zero_freq_exp += 1;
658 i = 1 << h->zero_freq_exp;
660 h->zero_freq_rem = h->zero_freq_count - i;
663 /* receives a bit at a time and returns true when a complete code has
666 static inline int fgk_decode_bit (fgk_stream* h, fgk_bit b)
668 XD3_ASSERT (b == 1 || b == 0);
670 if (IS_ADAPTIVE && h->decode_ptr->weight == 0)
674 if (h->zero_freq_rem == 0)
676 bitsreq = h->zero_freq_exp;
680 bitsreq = h->zero_freq_exp + 1;
683 h->coded_bits[h->coded_depth] = b;
686 return h->coded_depth >= bitsreq;
692 h->decode_ptr = h->decode_ptr->right_child;
696 h->decode_ptr = h->decode_ptr->left_child;
699 if (h->decode_ptr->left_child == NULL)
701 /* If the weight is non-zero, finished. */
702 if (h->decode_ptr->weight != 0)
707 /* zero_freq_count is dropping to 0, finished. */
708 return h->zero_freq_count == 1;
717 static usize_t fgk_nth_zero (fgk_stream* h, usize_t n)
719 fgk_node *ret = h->remaining_zeros;
721 /* ERROR: if during this loop (ret->right_child == NULL) then the
722 * encoder's zero count is too high. Could return an error code
723 * now, but is probably unnecessary overhead, since the caller
724 * should check integrity anyway. */
725 for (; n != 0 && ret->right_child != NULL; n -= 1)
727 ret = ret->right_child;
730 return (usize_t)(ret - h->alphabet);
733 /* once fgk_decode_bit returns 1, this retrieves an index into the
734 * alphabet otherwise this returns 0, indicating more bits are
737 static int fgk_decode_data (fgk_stream* h)
739 usize_t elt = (usize_t)(h->decode_ptr - h->alphabet);
741 if (IS_ADAPTIVE && h->decode_ptr->weight == 0) {
745 if (h->coded_depth > 0)
747 for (; i < h->coded_depth - 1; i += 1)
749 n |= h->coded_bits[i];
754 n |= h->coded_bits[i];
755 elt = fgk_nth_zero(h, n);
762 fgk_update_tree(h, elt);
765 h->decode_ptr = h->root_node;
770 static void fgk_destroy (xd3_stream *stream,
775 xd3_free (stream, h->alphabet);
776 xd3_free (stream, h->coded_bits);
777 xd3_free (stream, h->block_array);
778 xd3_free (stream, h);
782 /*********************************************************************/
784 /*********************************************************************/
787 xd3_encode_fgk (xd3_stream *stream, fgk_stream *sec_stream, xd3_output *input, xd3_output *output, xd3_sec_cfg *cfg)
789 bit_state bstate = BIT_STATE_ENCODE_INIT;
790 xd3_output *cur_page;
793 /* OPT: quit compression early if it looks bad */
794 for (cur_page = input; cur_page; cur_page = cur_page->next_page)
796 const uint8_t *inp = cur_page->base;
797 const uint8_t *inp_max = inp + cur_page->next;
799 while (inp < inp_max)
801 usize_t bits = fgk_encode_data (sec_stream, *inp++);
805 if ((ret = xd3_encode_bit (stream, & output, & bstate, fgk_get_encoded_bit (sec_stream)))) { return ret; }
810 return xd3_flush_bits (stream, & output, & bstate);
814 xd3_decode_fgk (xd3_stream *stream,
815 fgk_stream *sec_stream,
816 const uint8_t **input_pos,
817 const uint8_t *const input_max,
818 uint8_t **output_pos,
819 const uint8_t *const output_max)
822 uint8_t *output = *output_pos;
823 const uint8_t *input = *input_pos;
827 if (input == input_max)
829 stream->msg = "secondary decoder end of input";
833 bstate.cur_byte = *input++;
835 for (bstate.cur_mask = 1; bstate.cur_mask != 0x100; bstate.cur_mask <<= 1)
837 int done = fgk_decode_bit (sec_stream, (bstate.cur_byte & bstate.cur_mask) ? 1U : 0U);
839 if (! done) { continue; }
841 *output++ = fgk_decode_data (sec_stream);
843 if (output == output_max)
845 /* During regression testing: */
848 bstate.cur_mask <<= 1;
849 if ((ret = xd3_test_clean_bits (stream, & bstate))) { return ret; }
852 (*output_pos) = output;
853 (*input_pos) = input;
860 #endif /* _XDELTA3_FGK_ */