Added initial support for preset dictionary for raw LZMA1
authorLasse Collin <lasse.collin@tukaani.org>
Tue, 27 Jan 2009 16:36:05 +0000 (18:36 +0200)
committerLasse Collin <lasse.collin@tukaani.org>
Tue, 27 Jan 2009 16:36:05 +0000 (18:36 +0200)
and LZMA2. It is not supported by the .xz format or the xz
command line tool yet.

src/liblzma/lz/lz_decoder.c
src/liblzma/lz/lz_decoder.h
src/liblzma/lz/lz_encoder.c
src/liblzma/lzma/lzma2_decoder.c
src/liblzma/lzma/lzma2_encoder.c
src/liblzma/lzma/lzma_decoder.c
src/liblzma/lzma/lzma_decoder.h
src/liblzma/lzma/lzma_encoder.c

index 5ad5c96..99430c9 100644 (file)
@@ -210,7 +210,7 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
                const lzma_filter_info *filters,
                lzma_ret (*lz_init)(lzma_lz_decoder *lz,
                        lzma_allocator *allocator, const void *options,
-                       size_t *dict_size))
+                       lzma_lz_options *lz_options))
 {
        // Allocate the base structure if it isn't already allocated.
        if (next->coder == NULL) {
@@ -229,17 +229,17 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
        // Allocate and initialize the LZ-based decoder. It will also give
        // us the dictionary size.
-       size_t dict_size;
+       lzma_lz_options lz_options;
        return_if_error(lz_init(&next->coder->lz, allocator,
-                       filters[0].options, &dict_size));
+                       filters[0].options, &lz_options));
 
        // If the dictionary size is very small, increase it to 4096 bytes.
        // This is to prevent constant wrapping of the dictionary, which
        // would slow things down. The downside is that since we don't check
        // separately for the real dictionary size, we may happily accept
        // corrupt files.
-       if (dict_size < 4096)
-               dict_size = 4096;
+       if (lz_options.dict_size < 4096)
+               lz_options.dict_size = 4096;
 
        // Make dictionary size a multipe of 16. Some LZ-based decoders like
        // LZMA use the lowest bits lzma_dict.pos to know the alignment of the
@@ -248,23 +248,38 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
        // recommended to give aligned buffers to liblzma.
        //
        // Avoid integer overflow.
-       if (dict_size > SIZE_MAX - 15)
+       if (lz_options.dict_size > SIZE_MAX - 15)
                return LZMA_MEM_ERROR;
 
-       dict_size = (dict_size + 15) & ~((size_t)(15));
+       lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15));
 
        // Allocate and initialize the dictionary.
-       if (next->coder->dict.size != dict_size) {
+       if (next->coder->dict.size != lz_options.dict_size) {
                lzma_free(next->coder->dict.buf, allocator);
-               next->coder->dict.buf = lzma_alloc(dict_size, allocator);
+               next->coder->dict.buf
+                               = lzma_alloc(lz_options.dict_size, allocator);
                if (next->coder->dict.buf == NULL)
                        return LZMA_MEM_ERROR;
 
-               next->coder->dict.size = dict_size;
+               next->coder->dict.size = lz_options.dict_size;
        }
 
        lz_decoder_reset(next->coder);
 
+       // Use the preset dictionary if it was given to us.
+       if (lz_options.preset_dict != NULL
+                       && lz_options.preset_dict_size > 0) {
+               // If the preset dictionary is bigger than the actual
+               // dictionary, copy only the tail.
+               const size_t copy_size = MIN(lz_options.preset_dict_size,
+                               lz_options.dict_size);
+               const size_t offset = lz_options.preset_dict_size - copy_size;
+               memcpy(next->coder->dict.buf, lz_options.preset_dict + offset,
+                               copy_size);
+               next->coder->dict.pos = copy_size;
+               next->coder->dict.full = copy_size;
+       }
+
        // Miscellaneous initializations
        next->coder->next_finished = false;
        next->coder->this_finished = false;
index 5ac4405..9041d0b 100644 (file)
@@ -52,6 +52,13 @@ typedef struct {
 
 
 typedef struct {
+       size_t dict_size;
+       const uint8_t *preset_dict;
+       size_t preset_dict_size;
+} lzma_lz_options;
+
+
+typedef struct {
        /// Data specific to the LZ-based decoder
        lzma_coder *coder;
 
@@ -86,7 +93,7 @@ extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
                lzma_allocator *allocator, const lzma_filter_info *filters,
                lzma_ret (*lz_init)(lzma_lz_decoder *lz,
                        lzma_allocator *allocator, const void *options,
-                       size_t *dict_size));
+                       lzma_lz_options *lz_options));
 
 extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
 
index 7bd6d03..bd37953 100644 (file)
@@ -363,7 +363,8 @@ lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator,
 
 
 static bool
-lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator)
+lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator,
+               const lzma_lz_options *lz_options)
 {
        // Allocate the history buffer.
        if (mf->buffer == NULL) {
@@ -421,6 +422,19 @@ lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator)
        // we avoid wasting RAM and improve initialization speed a lot.
        //memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t));
 
+       // Handle preset dictionary.
+       if (lz_options->preset_dict != NULL
+                       && lz_options->preset_dict_size > 0) {
+               // If the preset dictionary is bigger than the actual
+               // dictionary, use only the tail.
+               mf->write_pos = MIN(lz_options->preset_dict_size, mf->size);
+               memcpy(mf->buffer, lz_options->preset_dict
+                               + lz_options->preset_dict_size - mf->write_pos,
+                               mf->write_pos);
+               mf->action = LZMA_SYNC_FLUSH;
+               mf->skip(mf, mf->write_pos);
+       }
+
        mf->action = LZMA_RUN;
 
        return false;
@@ -509,7 +523,7 @@ lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
        // Allocate new buffers if needed, and do the rest of
        // the initialization.
-       if (lz_encoder_init(&next->coder->mf, allocator))
+       if (lz_encoder_init(&next->coder->mf, allocator, &lz_options))
                return LZMA_MEM_ERROR;
 
        // Initialize the next filter in the chain, if any.
index ff90803..fe925a5 100644 (file)
@@ -230,7 +230,7 @@ lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 static lzma_ret
 lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
-               const void *options, size_t *dict_size)
+               const void *opt, lzma_lz_options *lz_options)
 {
        if (lz->coder == NULL) {
                lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
@@ -243,12 +243,15 @@ lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
                lz->coder->lzma = LZMA_LZ_DECODER_INIT;
        }
 
+       const lzma_options_lzma *options = opt;
+
        lz->coder->sequence = SEQ_CONTROL;
        lz->coder->need_properties = true;
-       lz->coder->need_dictionary_reset = true;
+       lz->coder->need_dictionary_reset = options->preset_dict == NULL
+                       || options->preset_dict_size == 0;
 
        return lzma_lzma_decoder_create(&lz->coder->lzma,
-                       allocator, options, dict_size);
+                       allocator, options, lz_options);
 }
 
 
index 81b6f97..cc676d5 100644 (file)
@@ -318,15 +318,17 @@ lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator,
                lz->coder->lzma = NULL;
        }
 
-       lz->coder->sequence = SEQ_INIT;
-       lz->coder->need_properties = true;
-       lz->coder->need_state_reset = false;
-       lz->coder->need_dictionary_reset = true;
-
        lz->coder->opt_cur = *(const lzma_options_lzma *)(options);
        lz->coder->opt_new = lz->coder->opt_cur.persistent
                                ? options : NULL;
 
+       lz->coder->sequence = SEQ_INIT;
+       lz->coder->need_properties = true;
+       lz->coder->need_state_reset = false;
+       lz->coder->need_dictionary_reset
+                       = lz->coder->opt_cur.preset_dict == NULL
+                       || lz->coder->opt_cur.preset_dict_size == 0;
+
        // Initialize LZMA encoder
        return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator,
                        &lz->coder->opt_cur, lz_options));
index 03e3251..80a9fd6 100644 (file)
@@ -941,7 +941,7 @@ lzma_decoder_reset(lzma_coder *coder, const void *opt)
 
 extern lzma_ret
 lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
-               const void *opt, size_t *dict_size)
+               const void *opt, lzma_lz_options *lz_options)
 {
        if (lz->coder == NULL) {
                lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
@@ -956,7 +956,9 @@ lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
        // All dictionary sizes are OK here. LZ decoder will take care of
        // the special cases.
        const lzma_options_lzma *options = opt;
-       *dict_size = options->dict_size;
+       lz_options->dict_size = options->dict_size;
+       lz_options->preset_dict = options->preset_dict;
+       lz_options->preset_dict_size = options->preset_dict_size;
 
        return LZMA_OK;
 }
@@ -967,13 +969,13 @@ lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
 /// the LZ initialization).
 static lzma_ret
 lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
-               const void *options, size_t *dict_size)
+               const void *options, lzma_lz_options *lz_options)
 {
        if (!is_lclppb_valid(options))
                return LZMA_PROG_ERROR;
 
        return_if_error(lzma_lzma_decoder_create(
-                       lz, allocator, options, dict_size));
+                       lz, allocator, options, lz_options));
 
        lzma_decoder_reset(lz->coder, options);
        lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);
index 133d260..1584405 100644 (file)
@@ -48,7 +48,7 @@ extern bool lzma_lzma_lclppb_decode(
 /// LZMA2 decoders.
 extern lzma_ret lzma_lzma_decoder_create(
                lzma_lz_decoder *lz, lzma_allocator *allocator,
-               const void *opt, size_t *dict_size);
+               const void *opt, lzma_lz_options *lz_options);
 
 /// Gets memory usage without validating lc/lp/pb. This is used by LZMA2
 /// decoder, because raw LZMA2 decoding doesn't need lc/lp/pb.
index 79bb8f9..cf7637d 100644 (file)
@@ -274,6 +274,8 @@ encode_symbol(lzma_coder *coder, lzma_mf *mf,
 static bool
 encode_init(lzma_coder *coder, lzma_mf *mf)
 {
+       assert(mf_position(mf) == 0);
+
        if (mf->read_pos == mf->read_limit) {
                if (mf->action == LZMA_RUN)
                        return false; // We cannot do anything.
@@ -594,7 +596,12 @@ lzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator,
                        return LZMA_OPTIONS_ERROR;
        }
 
-       coder->is_initialized = false;
+       // We don't need to write the first byte as literal if there is
+       // a non-empty preset dictionary. encode_init() wouldn't even work
+       // if there is a non-empty preset dictionary, because encode_init()
+       // assumes that position is zero and previous byte is also zero.
+       coder->is_initialized = options->preset_dict != NULL
+                       && options->preset_dict_size > 0;
        coder->is_flushed = false;
 
        set_lz_options(lz_options, options);