2 * Copyright (c) 2018 Grzegorz Antoniak (http://antoniak.org)
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
27 #include "archive_endian.h"
34 #include <cm3p/zlib.h> /* crc32 */
42 #include "archive_crc32.h"
45 #include "archive_entry.h"
46 #include "archive_entry_locale.h"
47 #include "archive_ppmd7_private.h"
48 #include "archive_entry_private.h"
53 #include "archive_blake2.h"
56 /*#define CHECK_CRC_ON_SOLID_SKIP*/
57 /*#define DONT_FAIL_ON_CRC_ERROR*/
60 #define rar5_min(a, b) (((a) > (b)) ? (b) : (a))
61 #define rar5_max(a, b) (((a) > (b)) ? (a) : (b))
62 #define rar5_countof(X) ((const ssize_t) (sizeof(X) / sizeof(*X)))
65 #define DEBUG_CODE if(1)
66 #define LOG(...) do { printf("rar5: " __VA_ARGS__); puts(""); } while(0)
68 #define DEBUG_CODE if(0)
71 /* Real RAR5 magic number is:
73 * 0x52, 0x61, 0x72, 0x21, 0x1a, 0x07, 0x01, 0x00
76 * Retrieved with `rar5_signature()` by XOR'ing it with 0xA1, because I don't
77 * want to put this magic sequence in each binary that uses libarchive, so
78 * applications that scan through the file for this marker won't trigger on
81 * The array itself is decrypted in `rar5_init` function. */
83 static unsigned char rar5_signature_xor[] = { 243, 192, 211, 128, 187, 166, 160, 161 };
84 static const size_t g_unpack_window_size = 0x20000;
86 /* These could have been static const's, but they aren't, because of
88 #define MAX_NAME_IN_CHARS 2048
89 #define MAX_NAME_IN_BYTES (4 * MAX_NAME_IN_CHARS)
92 ssize_t bytes_remaining;
93 ssize_t unpacked_size;
94 int64_t last_offset; /* Used in sanity checks. */
95 int64_t last_size; /* Used in sanity checks. */
97 uint8_t solid : 1; /* Is this a solid stream? */
98 uint8_t service : 1; /* Is this file a service data? */
99 uint8_t eof : 1; /* Did we finish unpacking the file? */
100 uint8_t dir : 1; /* Is this file entry a directory? */
102 /* Optional time fields. */
108 /* Optional hash fields. */
109 uint32_t stored_crc32;
110 uint32_t calculated_crc32;
111 uint8_t blake2sp[32];
112 blake2sp_state b2state;
115 /* Optional redir fields */
117 uint64_t redir_flags;
119 ssize_t solid_window_size; /* Used in file format check. */
132 #define REDIR_SYMLINK_IS_DIR 1
136 REDIR_TYPE_UNIXSYMLINK = 1,
137 REDIR_TYPE_WINSYMLINK = 2,
138 REDIR_TYPE_JUNCTION = 3,
139 REDIR_TYPE_HARDLINK = 4,
140 REDIR_TYPE_FILECOPY = 5,
143 #define OWNER_USER_NAME 0x01
144 #define OWNER_GROUP_NAME 0x02
145 #define OWNER_USER_UID 0x04
146 #define OWNER_GROUP_GID 0x08
147 #define OWNER_MAXNAMELEN 256
150 FILTER_DELTA = 0, /* Generic pattern. */
151 FILTER_E8 = 1, /* Intel x86 code. */
152 FILTER_E8E9 = 2, /* Intel x86 code. */
153 FILTER_ARM = 3, /* ARM code. */
154 FILTER_AUDIO = 4, /* Audio filter, not used in RARv5. */
155 FILTER_RGB = 5, /* Color palette, not used in RARv5. */
156 FILTER_ITANIUM = 6, /* Intel's Itanium, not used in RARv5. */
157 FILTER_PPM = 7, /* Predictive pattern matching, not used in
168 ssize_t block_length;
187 struct decode_table {
189 int32_t decode_len[16];
190 uint32_t decode_pos[16];
192 uint8_t quick_len[1 << 10];
193 uint16_t quick_num[1 << 10];
194 uint16_t decode_num[306];
198 /* Flag used to specify if unpacker needs to reinitialize the
199 uncompression context. */
200 uint8_t initialized : 1;
202 /* Flag used when applying filters. */
203 uint8_t all_filters_applied : 1;
205 /* Flag used to skip file context reinitialization, used when unpacker
206 is skipping through different multivolume archives. */
207 uint8_t switch_multivolume : 1;
209 /* Flag used to specify if unpacker has processed the whole data block
210 or just a part of it. */
211 uint8_t block_parsing_finished : 1;
213 signed int notused : 4;
215 int flags; /* Uncompression flags. */
216 int method; /* Uncompression algorithm method. */
217 int version; /* Uncompression algorithm version. */
218 ssize_t window_size; /* Size of window_buf. */
219 uint8_t* window_buf; /* Circular buffer used during
221 uint8_t* filtered_buf; /* Buffer used when applying filters. */
222 const uint8_t* block_buf; /* Buffer used when merging blocks. */
223 size_t window_mask; /* Convenience field; window_size - 1. */
224 int64_t write_ptr; /* This amount of data has been unpacked
225 in the window buffer. */
226 int64_t last_write_ptr; /* This amount of data has been stored in
228 int64_t last_unstore_ptr; /* Counter of bytes extracted during
229 unstoring. This is separate from
230 last_write_ptr because of how SERVICE
231 base blocks are handled during skipping
232 in solid multiarchive archives. */
233 int64_t solid_offset; /* Additional offset inside the window
234 buffer, used in unpacking solid
236 ssize_t cur_block_size; /* Size of current data block. */
237 int last_len; /* Flag used in lzss decompression. */
239 /* Decode tables used during lzss uncompression. */
242 struct decode_table bd; /* huffman bit lengths */
244 struct decode_table ld; /* literals */
246 struct decode_table dd; /* distances */
248 struct decode_table ldd; /* lower bits of distances */
250 struct decode_table rd; /* repeating distances */
251 #define HUFF_TABLE_SIZE (HUFF_NC + HUFF_DC + HUFF_RC + HUFF_LDC)
253 /* Circular deque for storing filters. */
254 struct cdeque filters;
255 int64_t last_block_start; /* Used for sanity checking. */
256 ssize_t last_block_length; /* Used for sanity checking. */
258 /* Distance cache used during lzss uncompression. */
261 /* Data buffer stack. */
262 struct data_ready dready[2];
265 /* Bit reader state. */
267 int8_t bit_addr; /* Current bit pointer inside current byte. */
268 int in_addr; /* Current byte pointer. */
271 /* RARv5 block header structure. Use bf_* functions to get values from
272 * block_flags_u8 field. I.e. bf_byte_count, etc. */
273 struct compressed_block_header {
274 /* block_flags_u8 contain fields encoded in little-endian bitfield:
276 * - table present flag (shr 7, and 1),
277 * - last block flag (shr 6, and 1),
278 * - byte_count (shr 3, and 7),
279 * - bit_size (shr 0, and 7).
281 uint8_t block_flags_u8;
285 /* RARv5 main header structure. */
287 /* Does the archive contain solid streams? */
290 /* If this a multi-file archive? */
298 struct generic_header {
299 uint8_t split_after : 1;
300 uint8_t split_before : 1;
307 unsigned int expected_vol_no;
311 /* Main context structure. */
313 int header_initialized;
315 /* Set to 1 if current file is positioned AFTER the magic value
316 * of the archive file. This is used in header reading functions. */
319 /* Set to not zero if we're in skip mode (either by calling
320 * rar5_data_skip function or when skipping over solid streams).
321 * Set to 0 when in * extraction mode. This is used during checksum
322 * calculation functions. */
325 /* Set to not zero if we're in block merging mode (i.e. when switching
326 * to another file in multivolume archive, last block from 1st archive
327 * needs to be merged with 1st block from 2nd archive). This flag
328 * guards against recursive use of the merging function, which doesn't
329 * support recursive calls. */
332 /* An offset to QuickOpen list. This is not supported by this unpacker,
333 * because we're focusing on streaming interface. QuickOpen is designed
334 * to make things quicker for non-stream interfaces, so it's not our
336 uint64_t qlist_offset;
338 /* An offset to additional Recovery data. This is not supported by this
339 * unpacker. Recovery data are additional Reed-Solomon codes that could
340 * be used to calculate bytes that are missing in archive or are
344 /* Various context variables grouped to different structures. */
345 struct generic_header generic;
346 struct main_header main;
347 struct comp_state cstate;
348 struct file_header file;
349 struct bit_reader bits;
350 struct multivolume vol;
352 /* The header of currently processed RARv5 block. Used in main
353 * decompression logic loop. */
354 struct compressed_block_header last_block_hdr;
357 /* Forward function declarations. */
359 static void rar5_signature(char *buf);
360 static int verify_global_checksums(struct archive_read* a);
361 static int rar5_read_data_skip(struct archive_read *a);
362 static int push_data_ready(struct archive_read* a, struct rar5* rar,
363 const uint8_t* buf, size_t size, int64_t offset);
365 /* CDE_xxx = Circular Double Ended (Queue) return values. */
366 enum CDE_RETURN_VALUES {
367 CDE_OK, CDE_ALLOC, CDE_PARAM, CDE_OUT_OF_BOUNDS,
370 /* Clears the contents of this circular deque. */
371 static void cdeque_clear(struct cdeque* d) {
377 /* Creates a new circular deque object. Capacity must be power of 2: 8, 16, 32,
378 * 64, 256, etc. When the user will add another item above current capacity,
379 * the circular deque will overwrite the oldest entry. */
380 static int cdeque_init(struct cdeque* d, int max_capacity_power_of_2) {
381 if(d == NULL || max_capacity_power_of_2 == 0)
384 d->cap_mask = max_capacity_power_of_2 - 1;
387 if((max_capacity_power_of_2 & d->cap_mask) != 0)
391 d->arr = malloc(sizeof(void*) * max_capacity_power_of_2);
393 return d->arr ? CDE_OK : CDE_ALLOC;
396 /* Return the current size (not capacity) of circular deque `d`. */
397 static size_t cdeque_size(struct cdeque* d) {
401 /* Returns the first element of current circular deque. Note that this function
402 * doesn't perform any bounds checking. If you need bounds checking, use
403 * `cdeque_front()` function instead. */
404 static void cdeque_front_fast(struct cdeque* d, void** value) {
405 *value = (void*) d->arr[d->beg_pos];
408 /* Returns the first element of current circular deque. This function
409 * performs bounds checking. */
410 static int cdeque_front(struct cdeque* d, void** value) {
412 cdeque_front_fast(d, value);
415 return CDE_OUT_OF_BOUNDS;
418 /* Pushes a new element into the end of this circular deque object. If current
419 * size will exceed capacity, the oldest element will be overwritten. */
420 static int cdeque_push_back(struct cdeque* d, void* item) {
424 if(d->size == d->cap_mask + 1)
425 return CDE_OUT_OF_BOUNDS;
427 d->arr[d->end_pos] = (size_t) item;
428 d->end_pos = (d->end_pos + 1) & d->cap_mask;
434 /* Pops a front element of this circular deque object and returns its value.
435 * This function doesn't perform any bounds checking. */
436 static void cdeque_pop_front_fast(struct cdeque* d, void** value) {
437 *value = (void*) d->arr[d->beg_pos];
438 d->beg_pos = (d->beg_pos + 1) & d->cap_mask;
442 /* Pops a front element of this circular deque object and returns its value.
443 * This function performs bounds checking. */
444 static int cdeque_pop_front(struct cdeque* d, void** value) {
449 return CDE_OUT_OF_BOUNDS;
451 cdeque_pop_front_fast(d, value);
455 /* Convenience function to cast filter_info** to void **. */
456 static void** cdeque_filter_p(struct filter_info** f) {
457 return (void**) (size_t) f;
460 /* Convenience function to cast filter_info* to void *. */
461 static void* cdeque_filter(struct filter_info* f) {
462 return (void**) (size_t) f;
465 /* Destroys this circular deque object. Deallocates the memory of the
466 * collection buffer, but doesn't deallocate the memory of any pointer passed
467 * to this deque as a value. */
468 static void cdeque_free(struct cdeque* d) {
484 uint8_t bf_bit_size(const struct compressed_block_header* hdr) {
485 return hdr->block_flags_u8 & 7;
489 uint8_t bf_byte_count(const struct compressed_block_header* hdr) {
490 return (hdr->block_flags_u8 >> 3) & 7;
494 uint8_t bf_is_table_present(const struct compressed_block_header* hdr) {
495 return (hdr->block_flags_u8 >> 7) & 1;
498 static inline struct rar5* get_context(struct archive_read* a) {
499 return (struct rar5*) a->format->data;
502 /* Convenience functions used by filter implementations. */
503 static void circular_memcpy(uint8_t* dst, uint8_t* window, const uint64_t mask,
504 int64_t start, int64_t end)
506 if((start & mask) > (end & mask)) {
507 ssize_t len1 = mask + 1 - (start & mask);
508 ssize_t len2 = end & mask;
510 memcpy(dst, &window[start & mask], len1);
511 memcpy(dst + len1, window, len2);
513 memcpy(dst, &window[start & mask], (size_t) (end - start));
517 static uint32_t read_filter_data(struct rar5* rar, uint32_t offset) {
518 uint8_t linear_buf[4];
519 circular_memcpy(linear_buf, rar->cstate.window_buf,
520 rar->cstate.window_mask, offset, offset + 4);
521 return archive_le32dec(linear_buf);
524 static void write_filter_data(struct rar5* rar, uint32_t offset,
527 archive_le32enc(&rar->cstate.filtered_buf[offset], value);
530 /* Allocates a new filter descriptor and adds it to the filter array. */
531 static struct filter_info* add_new_filter(struct rar5* rar) {
532 struct filter_info* f =
533 (struct filter_info*) calloc(1, sizeof(struct filter_info));
539 cdeque_push_back(&rar->cstate.filters, cdeque_filter(f));
543 static int run_delta_filter(struct rar5* rar, struct filter_info* flt) {
545 ssize_t dest_pos, src_pos = 0;
547 for(i = 0; i < flt->channels; i++) {
548 uint8_t prev_byte = 0;
550 dest_pos < flt->block_length;
551 dest_pos += flt->channels)
555 byte = rar->cstate.window_buf[
556 (rar->cstate.solid_offset + flt->block_start +
557 src_pos) & rar->cstate.window_mask];
560 rar->cstate.filtered_buf[dest_pos] = prev_byte;
568 static int run_e8e9_filter(struct rar5* rar, struct filter_info* flt,
571 const uint32_t file_size = 0x1000000;
574 circular_memcpy(rar->cstate.filtered_buf,
575 rar->cstate.window_buf, rar->cstate.window_mask,
576 rar->cstate.solid_offset + flt->block_start,
577 rar->cstate.solid_offset + flt->block_start + flt->block_length);
579 for(i = 0; i < flt->block_length - 4;) {
580 uint8_t b = rar->cstate.window_buf[
581 (rar->cstate.solid_offset + flt->block_start +
582 i++) & rar->cstate.window_mask];
585 * 0xE8 = x86's call <relative_addr_uint32> (function call)
586 * 0xE9 = x86's jmp <relative_addr_uint32> (unconditional jump)
588 if(b == 0xE8 || (extended && b == 0xE9)) {
591 uint32_t offset = (i + flt->block_start) % file_size;
593 addr = read_filter_data(rar,
594 (uint32_t)(rar->cstate.solid_offset +
595 flt->block_start + i) & rar->cstate.window_mask);
597 if(addr & 0x80000000) {
598 if(((addr + offset) & 0x80000000) == 0) {
599 write_filter_data(rar, (uint32_t)i,
603 if((addr - file_size) & 0x80000000) {
604 uint32_t naddr = addr - offset;
605 write_filter_data(rar, (uint32_t)i,
617 static int run_arm_filter(struct rar5* rar, struct filter_info* flt) {
621 circular_memcpy(rar->cstate.filtered_buf,
622 rar->cstate.window_buf, rar->cstate.window_mask,
623 rar->cstate.solid_offset + flt->block_start,
624 rar->cstate.solid_offset + flt->block_start + flt->block_length);
626 for(i = 0; i < flt->block_length - 3; i += 4) {
627 uint8_t* b = &rar->cstate.window_buf[
628 (rar->cstate.solid_offset +
629 flt->block_start + i + 3) & rar->cstate.window_mask];
632 /* 0xEB = ARM's BL (branch + link) instruction. */
633 offset = read_filter_data(rar,
634 (rar->cstate.solid_offset + flt->block_start + i) &
635 rar->cstate.window_mask) & 0x00ffffff;
637 offset -= (uint32_t) ((i + flt->block_start) / 4);
638 offset = (offset & 0x00ffffff) | 0xeb000000;
639 write_filter_data(rar, (uint32_t)i, offset);
646 static int run_filter(struct archive_read* a, struct filter_info* flt) {
648 struct rar5* rar = get_context(a);
650 free(rar->cstate.filtered_buf);
652 rar->cstate.filtered_buf = malloc(flt->block_length);
653 if(!rar->cstate.filtered_buf) {
654 archive_set_error(&a->archive, ENOMEM,
655 "Can't allocate memory for filter data.");
656 return ARCHIVE_FATAL;
661 ret = run_delta_filter(rar, flt);
667 ret = run_e8e9_filter(rar, flt,
668 flt->type == FILTER_E8E9);
672 ret = run_arm_filter(rar, flt);
676 archive_set_error(&a->archive,
677 ARCHIVE_ERRNO_FILE_FORMAT,
678 "Unsupported filter type: 0x%x", flt->type);
679 return ARCHIVE_FATAL;
682 if(ret != ARCHIVE_OK) {
683 /* Filter has failed. */
687 if(ARCHIVE_OK != push_data_ready(a, rar, rar->cstate.filtered_buf,
688 flt->block_length, rar->cstate.last_write_ptr))
690 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
691 "Stack overflow when submitting unpacked data");
693 return ARCHIVE_FATAL;
696 rar->cstate.last_write_ptr += flt->block_length;
700 /* The `push_data` function submits the selected data range to the user.
701 * Next call of `use_data` will use the pointer, size and offset arguments
702 * that are specified here. These arguments are pushed to the FIFO stack here,
703 * and popped from the stack by the `use_data` function. */
704 static void push_data(struct archive_read* a, struct rar5* rar,
705 const uint8_t* buf, int64_t idx_begin, int64_t idx_end)
707 const uint64_t wmask = rar->cstate.window_mask;
708 const ssize_t solid_write_ptr = (rar->cstate.solid_offset +
709 rar->cstate.last_write_ptr) & wmask;
711 idx_begin += rar->cstate.solid_offset;
712 idx_end += rar->cstate.solid_offset;
714 /* Check if our unpacked data is wrapped inside the window circular
715 * buffer. If it's not wrapped, it can be copied out by using
716 * a single memcpy, but when it's wrapped, we need to copy the first
717 * part with one memcpy, and the second part with another memcpy. */
719 if((idx_begin & wmask) > (idx_end & wmask)) {
720 /* The data is wrapped (begin offset sis bigger than end
722 const ssize_t frag1_size = rar->cstate.window_size -
724 const ssize_t frag2_size = idx_end & wmask;
726 /* Copy the first part of the buffer first. */
727 push_data_ready(a, rar, buf + solid_write_ptr, frag1_size,
728 rar->cstate.last_write_ptr);
730 /* Copy the second part of the buffer. */
731 push_data_ready(a, rar, buf, frag2_size,
732 rar->cstate.last_write_ptr + frag1_size);
734 rar->cstate.last_write_ptr += frag1_size + frag2_size;
736 /* Data is not wrapped, so we can just use one call to copy the
738 push_data_ready(a, rar,
739 buf + solid_write_ptr, (idx_end - idx_begin) & wmask,
740 rar->cstate.last_write_ptr);
742 rar->cstate.last_write_ptr += idx_end - idx_begin;
746 /* Convenience function that submits the data to the user. It uses the
747 * unpack window buffer as a source location. */
748 static void push_window_data(struct archive_read* a, struct rar5* rar,
749 int64_t idx_begin, int64_t idx_end)
751 push_data(a, rar, rar->cstate.window_buf, idx_begin, idx_end);
754 static int apply_filters(struct archive_read* a) {
755 struct filter_info* flt;
756 struct rar5* rar = get_context(a);
759 rar->cstate.all_filters_applied = 0;
761 /* Get the first filter that can be applied to our data. The data
762 * needs to be fully unpacked before the filter can be run. */
763 if(CDE_OK == cdeque_front(&rar->cstate.filters,
764 cdeque_filter_p(&flt))) {
765 /* Check if our unpacked data fully covers this filter's
767 if(rar->cstate.write_ptr > flt->block_start &&
768 rar->cstate.write_ptr >= flt->block_start +
770 /* Check if we have some data pending to be written
771 * right before the filter's start offset. */
772 if(rar->cstate.last_write_ptr == flt->block_start) {
773 /* Run the filter specified by descriptor
775 ret = run_filter(a, flt);
776 if(ret != ARCHIVE_OK) {
777 /* Filter failure, return error. */
781 /* Filter descriptor won't be needed anymore
782 * after it's used, * so remove it from the
783 * filter list and free its memory. */
784 (void) cdeque_pop_front(&rar->cstate.filters,
785 cdeque_filter_p(&flt));
789 /* We can't run filters yet, dump the memory
790 * right before the filter. */
791 push_window_data(a, rar,
792 rar->cstate.last_write_ptr,
796 /* Return 'filter applied or not needed' state to the
798 return ARCHIVE_RETRY;
802 rar->cstate.all_filters_applied = 1;
806 static void dist_cache_push(struct rar5* rar, int value) {
807 int* q = rar->cstate.dist_cache;
815 static int dist_cache_touch(struct rar5* rar, int idx) {
816 int* q = rar->cstate.dist_cache;
817 int i, dist = q[idx];
819 for(i = idx; i > 0; i--)
826 static void free_filters(struct rar5* rar) {
827 struct cdeque* d = &rar->cstate.filters;
829 /* Free any remaining filters. All filters should be naturally
830 * consumed by the unpacking function, so remaining filters after
831 * unpacking normally mean that unpacking wasn't successful.
832 * But still of course we shouldn't leak memory in such case. */
834 /* cdeque_size() is a fast operation, so we can use it as a loop
836 while(cdeque_size(d) > 0) {
837 struct filter_info* f = NULL;
839 /* Pop_front will also decrease the collection's size. */
840 if (CDE_OK == cdeque_pop_front(d, cdeque_filter_p(&f)))
846 /* Also clear out the variables needed for sanity checking. */
847 rar->cstate.last_block_start = 0;
848 rar->cstate.last_block_length = 0;
851 static void reset_file_context(struct rar5* rar) {
852 memset(&rar->file, 0, sizeof(rar->file));
853 blake2sp_init(&rar->file.b2state, 32);
855 if(rar->main.solid) {
856 rar->cstate.solid_offset += rar->cstate.write_ptr;
858 rar->cstate.solid_offset = 0;
861 rar->cstate.write_ptr = 0;
862 rar->cstate.last_write_ptr = 0;
863 rar->cstate.last_unstore_ptr = 0;
865 rar->file.redir_type = REDIR_TYPE_NONE;
866 rar->file.redir_flags = 0;
871 static inline int get_archive_read(struct archive* a,
872 struct archive_read** ar)
874 *ar = (struct archive_read*) a;
875 archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
876 "archive_read_support_format_rar5");
881 static int read_ahead(struct archive_read* a, size_t how_many,
888 *ptr = __archive_read_ahead(a, how_many, &avail);
896 static int consume(struct archive_read* a, int64_t how_many) {
899 ret = how_many == __archive_read_consume(a, how_many)
907 * Read a RAR5 variable sized numeric value. This value will be stored in
908 * `pvalue`. The `pvalue_len` argument points to a variable that will receive
909 * the byte count that was consumed in order to decode the `pvalue` value, plus
912 * pvalue_len is optional and can be NULL.
914 * NOTE: if `pvalue_len` is NOT NULL, the caller needs to manually consume
915 * the number of bytes that `pvalue_len` value contains. If the `pvalue_len`
916 * is NULL, this consuming operation is done automatically.
918 * Returns 1 if *pvalue was successfully read.
919 * Returns 0 if there was an error. In this case, *pvalue contains an
923 static int read_var(struct archive_read* a, uint64_t* pvalue,
924 uint64_t* pvalue_len)
931 /* We will read maximum of 8 bytes. We don't have to handle the
932 * situation to read the RAR5 variable-sized value stored at the end of
933 * the file, because such situation will never happen. */
934 if(!read_ahead(a, 8, &p))
937 for(shift = 0, i = 0; i < 8; i++, shift += 7) {
940 /* Strip the MSB from the input byte and add the resulting
941 * number to the `result`. */
942 result += (b & (uint64_t)0x7F) << shift;
944 /* MSB set to 1 means we need to continue decoding process.
945 * MSB set to 0 means we're done.
947 * This conditional checks for the second case. */
948 if((b & 0x80) == 0) {
953 /* If the caller has passed the `pvalue_len` pointer,
954 * store the number of consumed bytes in it and do NOT
955 * consume those bytes, since the caller has all the
956 * information it needs to perform */
960 /* If the caller did not provide the
961 * `pvalue_len` pointer, it will not have the
962 * possibility to advance the file pointer,
963 * because it will not know how many bytes it
964 * needs to consume. This is why we handle
965 * such situation here automatically. */
966 if(ARCHIVE_OK != consume(a, 1 + i)) {
971 /* End of decoding process, return success. */
976 /* The decoded value takes the maximum number of 8 bytes.
977 * It's a maximum number of bytes, so end decoding process here
978 * even if the first bit of last byte is 1. */
986 if(ARCHIVE_OK != consume(a, 9)) {
994 static int read_var_sized(struct archive_read* a, size_t* pvalue,
1000 const int ret = pvalue_len ? read_var(a, &v, &v_size)
1001 : read_var(a, &v, NULL);
1003 if(ret == 1 && pvalue) {
1004 *pvalue = (size_t) v;
1008 /* Possible data truncation should be safe. */
1009 *pvalue_len = (size_t) v_size;
1015 static int read_bits_32(struct archive_read* a, struct rar5* rar,
1016 const uint8_t* p, uint32_t* value)
1018 if(rar->bits.in_addr >= rar->cstate.cur_block_size) {
1019 archive_set_error(&a->archive,
1020 ARCHIVE_ERRNO_PROGRAMMER,
1021 "Premature end of stream during extraction of data (#1)");
1022 return ARCHIVE_FATAL;
1025 uint32_t bits = ((uint32_t) p[rar->bits.in_addr]) << 24;
1026 bits |= p[rar->bits.in_addr + 1] << 16;
1027 bits |= p[rar->bits.in_addr + 2] << 8;
1028 bits |= p[rar->bits.in_addr + 3];
1029 bits <<= rar->bits.bit_addr;
1030 bits |= p[rar->bits.in_addr + 4] >> (8 - rar->bits.bit_addr);
1035 static int read_bits_16(struct archive_read* a, struct rar5* rar,
1036 const uint8_t* p, uint16_t* value)
1038 if(rar->bits.in_addr >= rar->cstate.cur_block_size) {
1039 archive_set_error(&a->archive,
1040 ARCHIVE_ERRNO_PROGRAMMER,
1041 "Premature end of stream during extraction of data (#2)");
1042 return ARCHIVE_FATAL;
1045 int bits = (int) ((uint32_t) p[rar->bits.in_addr]) << 16;
1046 bits |= (int) p[rar->bits.in_addr + 1] << 8;
1047 bits |= (int) p[rar->bits.in_addr + 2];
1048 bits >>= (8 - rar->bits.bit_addr);
1049 *value = bits & 0xffff;
1053 static void skip_bits(struct rar5* rar, int bits) {
1054 const int new_bits = rar->bits.bit_addr + bits;
1055 rar->bits.in_addr += new_bits >> 3;
1056 rar->bits.bit_addr = new_bits & 7;
1060 static int read_consume_bits(struct archive_read* a, struct rar5* rar,
1061 const uint8_t* p, int n, int* value)
1066 if(n == 0 || n > 16) {
1067 /* This is a programmer error and should never happen
1069 return ARCHIVE_FATAL;
1072 ret = read_bits_16(a, rar, p, &v);
1073 if(ret != ARCHIVE_OK)
1087 static int read_u32(struct archive_read* a, uint32_t* pvalue) {
1089 if(!read_ahead(a, 4, &p))
1092 *pvalue = archive_le32dec(p);
1093 return ARCHIVE_OK == consume(a, 4) ? 1 : 0;
1096 static int read_u64(struct archive_read* a, uint64_t* pvalue) {
1098 if(!read_ahead(a, 8, &p))
1101 *pvalue = archive_le64dec(p);
1102 return ARCHIVE_OK == consume(a, 8) ? 1 : 0;
1105 static int bid_standard(struct archive_read* a) {
1107 char signature[sizeof(rar5_signature_xor)];
1109 rar5_signature(signature);
1111 if(!read_ahead(a, sizeof(rar5_signature_xor), &p))
1114 if(!memcmp(signature, p, sizeof(rar5_signature_xor)))
1120 static int rar5_bid(struct archive_read* a, int best_bid) {
1126 my_bid = bid_standard(a);
1134 static int rar5_options(struct archive_read *a, const char *key,
1140 /* No options supported in this version. Return the ARCHIVE_WARN code
1141 * to signal the options supervisor that the unpacker didn't handle
1142 * setting this option. */
1144 return ARCHIVE_WARN;
1147 static void init_header(struct archive_read* a) {
1148 a->archive.archive_format = ARCHIVE_FORMAT_RAR_V5;
1149 a->archive.archive_format_name = "RAR5";
1152 static void init_window_mask(struct rar5* rar) {
1153 if (rar->cstate.window_size)
1154 rar->cstate.window_mask = rar->cstate.window_size - 1;
1156 rar->cstate.window_mask = 0;
1160 HFL_EXTRA_DATA = 0x0001,
1162 HFL_SKIP_IF_UNKNOWN = 0x0004,
1163 HFL_SPLIT_BEFORE = 0x0008,
1164 HFL_SPLIT_AFTER = 0x0010,
1166 HFL_INHERITED = 0x0040
1169 static int process_main_locator_extra_block(struct archive_read* a,
1172 uint64_t locator_flags;
1174 enum LOCATOR_FLAGS {
1175 QLIST = 0x01, RECOVERY = 0x02,
1178 if(!read_var(a, &locator_flags, NULL)) {
1182 if(locator_flags & QLIST) {
1183 if(!read_var(a, &rar->qlist_offset, NULL)) {
1187 /* qlist is not used */
1190 if(locator_flags & RECOVERY) {
1191 if(!read_var(a, &rar->rr_offset, NULL)) {
1195 /* rr is not used */
1201 static int parse_file_extra_hash(struct archive_read* a, struct rar5* rar,
1202 ssize_t* extra_data_size)
1204 size_t hash_type = 0;
1211 if(!read_var_sized(a, &hash_type, &value_len))
1214 *extra_data_size -= value_len;
1215 if(ARCHIVE_OK != consume(a, value_len)) {
1219 /* The file uses BLAKE2sp checksum algorithm instead of plain old
1221 if(hash_type == BLAKE2sp) {
1223 const int hash_size = sizeof(rar->file.blake2sp);
1225 if(!read_ahead(a, hash_size, &p))
1228 rar->file.has_blake2 = 1;
1229 memcpy(&rar->file.blake2sp, p, hash_size);
1231 if(ARCHIVE_OK != consume(a, hash_size)) {
1235 *extra_data_size -= hash_size;
1237 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1238 "Unsupported hash type (0x%x)", (int) hash_type);
1239 return ARCHIVE_FATAL;
1245 static uint64_t time_win_to_unix(uint64_t win_time) {
1246 const size_t ns_in_sec = 10000000;
1247 const uint64_t sec_to_unix = 11644473600LL;
1248 return win_time / ns_in_sec - sec_to_unix;
1251 static int parse_htime_item(struct archive_read* a, char unix_time,
1252 uint64_t* where, ssize_t* extra_data_size)
1256 if(!read_u32(a, &time_val))
1259 *extra_data_size -= 4;
1260 *where = (uint64_t) time_val;
1262 uint64_t windows_time;
1263 if(!read_u64(a, &windows_time))
1266 *where = time_win_to_unix(windows_time);
1267 *extra_data_size -= 8;
1273 static int parse_file_extra_version(struct archive_read* a,
1274 struct archive_entry* e, ssize_t* extra_data_size)
1278 size_t value_len = 0;
1279 struct archive_string version_string;
1280 struct archive_string name_utf8_string;
1281 const char* cur_filename;
1283 /* Flags are ignored. */
1284 if(!read_var_sized(a, &flags, &value_len))
1287 *extra_data_size -= value_len;
1288 if(ARCHIVE_OK != consume(a, value_len))
1291 if(!read_var_sized(a, &version, &value_len))
1294 *extra_data_size -= value_len;
1295 if(ARCHIVE_OK != consume(a, value_len))
1298 /* extra_data_size should be zero here. */
1300 cur_filename = archive_entry_pathname_utf8(e);
1301 if(cur_filename == NULL) {
1302 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1303 "Version entry without file name");
1304 return ARCHIVE_FATAL;
1307 archive_string_init(&version_string);
1308 archive_string_init(&name_utf8_string);
1310 /* Prepare a ;123 suffix for the filename, where '123' is the version
1311 * value of this file. */
1312 archive_string_sprintf(&version_string, ";%zu", version);
1314 /* Build the new filename. */
1315 archive_strcat(&name_utf8_string, cur_filename);
1316 archive_strcat(&name_utf8_string, version_string.s);
1318 /* Apply the new filename into this file's context. */
1319 archive_entry_update_pathname_utf8(e, name_utf8_string.s);
1322 archive_string_free(&version_string);
1323 archive_string_free(&name_utf8_string);
1327 static int parse_file_extra_htime(struct archive_read* a,
1328 struct archive_entry* e, struct rar5* rar, ssize_t* extra_data_size)
1342 if(!read_var_sized(a, &flags, &value_len))
1345 *extra_data_size -= value_len;
1346 if(ARCHIVE_OK != consume(a, value_len)) {
1350 unix_time = flags & IS_UNIX;
1352 if(flags & HAS_MTIME) {
1353 parse_htime_item(a, unix_time, &rar->file.e_mtime,
1355 archive_entry_set_mtime(e, rar->file.e_mtime, 0);
1358 if(flags & HAS_CTIME) {
1359 parse_htime_item(a, unix_time, &rar->file.e_ctime,
1361 archive_entry_set_ctime(e, rar->file.e_ctime, 0);
1364 if(flags & HAS_ATIME) {
1365 parse_htime_item(a, unix_time, &rar->file.e_atime,
1367 archive_entry_set_atime(e, rar->file.e_atime, 0);
1370 if(flags & HAS_UNIX_NS) {
1371 if(!read_u32(a, &rar->file.e_unix_ns))
1374 *extra_data_size -= 4;
1380 static int parse_file_extra_redir(struct archive_read* a,
1381 struct archive_entry* e, struct rar5* rar, ssize_t* extra_data_size)
1383 uint64_t value_size = 0;
1384 size_t target_size = 0;
1385 char target_utf8_buf[MAX_NAME_IN_BYTES];
1388 if(!read_var(a, &rar->file.redir_type, &value_size))
1390 if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1392 *extra_data_size -= value_size;
1394 if(!read_var(a, &rar->file.redir_flags, &value_size))
1396 if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1398 *extra_data_size -= value_size;
1400 if(!read_var_sized(a, &target_size, NULL))
1402 *extra_data_size -= target_size + 1;
1404 if(!read_ahead(a, target_size, &p))
1407 if(target_size > (MAX_NAME_IN_CHARS - 1)) {
1408 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1409 "Link target is too long");
1410 return ARCHIVE_FATAL;
1413 if(target_size == 0) {
1414 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1415 "No link target specified");
1416 return ARCHIVE_FATAL;
1419 memcpy(target_utf8_buf, p, target_size);
1420 target_utf8_buf[target_size] = 0;
1422 if(ARCHIVE_OK != consume(a, (int64_t)target_size))
1425 switch(rar->file.redir_type) {
1426 case REDIR_TYPE_UNIXSYMLINK:
1427 case REDIR_TYPE_WINSYMLINK:
1428 archive_entry_set_filetype(e, AE_IFLNK);
1429 archive_entry_update_symlink_utf8(e, target_utf8_buf);
1430 if (rar->file.redir_flags & REDIR_SYMLINK_IS_DIR) {
1431 archive_entry_set_symlink_type(e,
1432 AE_SYMLINK_TYPE_DIRECTORY);
1434 archive_entry_set_symlink_type(e,
1435 AE_SYMLINK_TYPE_FILE);
1439 case REDIR_TYPE_HARDLINK:
1440 archive_entry_set_filetype(e, AE_IFREG);
1441 archive_entry_update_hardlink_utf8(e, target_utf8_buf);
1445 /* Unknown redir type, skip it. */
1451 static int parse_file_extra_owner(struct archive_read* a,
1452 struct archive_entry* e, ssize_t* extra_data_size)
1455 uint64_t value_size = 0;
1457 size_t name_len = 0;
1458 size_t name_size = 0;
1459 char namebuf[OWNER_MAXNAMELEN];
1462 if(!read_var(a, &flags, &value_size))
1464 if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1466 *extra_data_size -= value_size;
1468 if ((flags & OWNER_USER_NAME) != 0) {
1469 if(!read_var_sized(a, &name_size, NULL))
1471 *extra_data_size -= name_size + 1;
1473 if(!read_ahead(a, name_size, &p))
1476 if (name_size >= OWNER_MAXNAMELEN) {
1477 name_len = OWNER_MAXNAMELEN - 1;
1479 name_len = name_size;
1482 memcpy(namebuf, p, name_len);
1483 namebuf[name_len] = 0;
1484 if(ARCHIVE_OK != consume(a, (int64_t)name_size))
1487 archive_entry_set_uname(e, namebuf);
1489 if ((flags & OWNER_GROUP_NAME) != 0) {
1490 if(!read_var_sized(a, &name_size, NULL))
1492 *extra_data_size -= name_size + 1;
1494 if(!read_ahead(a, name_size, &p))
1497 if (name_size >= OWNER_MAXNAMELEN) {
1498 name_len = OWNER_MAXNAMELEN - 1;
1500 name_len = name_size;
1503 memcpy(namebuf, p, name_len);
1504 namebuf[name_len] = 0;
1505 if(ARCHIVE_OK != consume(a, (int64_t)name_size))
1508 archive_entry_set_gname(e, namebuf);
1510 if ((flags & OWNER_USER_UID) != 0) {
1511 if(!read_var(a, &id, &value_size))
1513 if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1515 *extra_data_size -= value_size;
1517 archive_entry_set_uid(e, (la_int64_t)id);
1519 if ((flags & OWNER_GROUP_GID) != 0) {
1520 if(!read_var(a, &id, &value_size))
1522 if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1524 *extra_data_size -= value_size;
1526 archive_entry_set_gid(e, (la_int64_t)id);
1531 static int process_head_file_extra(struct archive_read* a,
1532 struct archive_entry* e, struct rar5* rar, ssize_t extra_data_size)
1534 size_t extra_field_size;
1535 size_t extra_field_id = 0;
1536 int ret = ARCHIVE_FATAL;
1539 while(extra_data_size > 0) {
1540 if(!read_var_sized(a, &extra_field_size, &var_size))
1543 extra_data_size -= var_size;
1544 if(ARCHIVE_OK != consume(a, var_size)) {
1548 if(!read_var_sized(a, &extra_field_id, &var_size))
1551 extra_data_size -= var_size;
1552 if(ARCHIVE_OK != consume(a, var_size)) {
1556 switch(extra_field_id) {
1558 ret = parse_file_extra_hash(a, rar,
1562 ret = parse_file_extra_htime(a, e, rar,
1566 ret = parse_file_extra_redir(a, e, rar,
1570 ret = parse_file_extra_owner(a, e,
1574 ret = parse_file_extra_version(a, e,
1582 /* Skip unsupported entry. */
1583 return consume(a, extra_data_size);
1587 if(ret != ARCHIVE_OK) {
1588 /* Attribute not implemented. */
1595 static int process_head_file(struct archive_read* a, struct rar5* rar,
1596 struct archive_entry* entry, size_t block_flags)
1598 ssize_t extra_data_size = 0;
1599 size_t data_size = 0;
1600 size_t file_flags = 0;
1601 size_t file_attr = 0;
1602 size_t compression_info = 0;
1604 size_t name_size = 0;
1605 uint64_t unpacked_size, window_size;
1606 uint32_t mtime = 0, crc = 0;
1607 int c_method = 0, c_version = 0;
1608 char name_utf8_buf[MAX_NAME_IN_BYTES];
1612 DIRECTORY = 0x0001, UTIME = 0x0002, CRC32 = 0x0004,
1613 UNKNOWN_UNPACKED_SIZE = 0x0008,
1617 ATTR_READONLY = 0x1, ATTR_HIDDEN = 0x2, ATTR_SYSTEM = 0x4,
1618 ATTR_DIRECTORY = 0x10,
1621 enum COMP_INFO_FLAGS {
1630 archive_entry_clear(entry);
1632 /* Do not reset file context if we're switching archives. */
1633 if(!rar->cstate.switch_multivolume) {
1634 reset_file_context(rar);
1637 if(block_flags & HFL_EXTRA_DATA) {
1638 size_t edata_size = 0;
1639 if(!read_var_sized(a, &edata_size, NULL))
1642 /* Intentional type cast from unsigned to signed. */
1643 extra_data_size = (ssize_t) edata_size;
1646 if(block_flags & HFL_DATA) {
1647 if(!read_var_sized(a, &data_size, NULL))
1650 rar->file.bytes_remaining = data_size;
1652 rar->file.bytes_remaining = 0;
1654 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1655 "no data found in file/service block");
1656 return ARCHIVE_FATAL;
1659 if(!read_var_sized(a, &file_flags, NULL))
1662 if(!read_var(a, &unpacked_size, NULL))
1665 if(file_flags & UNKNOWN_UNPACKED_SIZE) {
1666 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1667 "Files with unknown unpacked size are not supported");
1668 return ARCHIVE_FATAL;
1671 rar->file.dir = (uint8_t) ((file_flags & DIRECTORY) > 0);
1673 if(!read_var_sized(a, &file_attr, NULL))
1676 if(file_flags & UTIME) {
1677 if(!read_u32(a, &mtime))
1681 if(file_flags & CRC32) {
1682 if(!read_u32(a, &crc))
1686 if(!read_var_sized(a, &compression_info, NULL))
1689 c_method = (int) (compression_info >> 7) & 0x7;
1690 c_version = (int) (compression_info & 0x3f);
1692 /* RAR5 seems to limit the dictionary size to 64MB. */
1693 window_size = (rar->file.dir > 0) ?
1695 g_unpack_window_size << ((compression_info >> 10) & 15);
1696 rar->cstate.method = c_method;
1697 rar->cstate.version = c_version + 50;
1698 rar->file.solid = (compression_info & SOLID) > 0;
1700 /* Archives which declare solid files without initializing the window
1701 * buffer first are invalid. */
1703 if(rar->file.solid > 0 && rar->cstate.window_buf == NULL) {
1704 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1705 "Declared solid file, but no window buffer "
1706 "initialized yet.");
1707 return ARCHIVE_FATAL;
1710 /* Check if window_size is a sane value. Also, if the file is not
1711 * declared as a directory, disallow window_size == 0. */
1712 if(window_size > (64 * 1024 * 1024) ||
1713 (rar->file.dir == 0 && window_size == 0))
1715 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1716 "Declared dictionary size is not supported.");
1717 return ARCHIVE_FATAL;
1720 if(rar->file.solid > 0) {
1721 /* Re-check if current window size is the same as previous
1722 * window size (for solid files only). */
1723 if(rar->file.solid_window_size > 0 &&
1724 rar->file.solid_window_size != (ssize_t) window_size)
1726 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1727 "Window size for this solid file doesn't match "
1728 "the window size used in previous solid file. ");
1729 return ARCHIVE_FATAL;
1733 if(rar->cstate.window_size < (ssize_t) window_size &&
1734 rar->cstate.window_buf)
1736 /* If window_buf has been allocated before, reallocate it, so
1737 * that its size will match new window_size. */
1739 uint8_t* new_window_buf =
1740 realloc(rar->cstate.window_buf, window_size);
1742 if(!new_window_buf) {
1743 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1744 "Not enough memory when trying to realloc the window "
1746 return ARCHIVE_FATAL;
1749 rar->cstate.window_buf = new_window_buf;
1752 /* Values up to 64M should fit into ssize_t on every
1754 rar->cstate.window_size = (ssize_t) window_size;
1756 if(rar->file.solid > 0 && rar->file.solid_window_size == 0) {
1757 /* Solid files have to have the same window_size across
1758 whole archive. Remember the window_size parameter
1759 for first solid file found. */
1760 rar->file.solid_window_size = rar->cstate.window_size;
1763 init_window_mask(rar);
1765 rar->file.service = 0;
1767 if(!read_var_sized(a, &host_os, NULL))
1770 if(host_os == HOST_WINDOWS) {
1771 /* Host OS is Windows */
1775 if(file_attr & ATTR_DIRECTORY) {
1776 if (file_attr & ATTR_READONLY) {
1777 mode = 0555 | AE_IFDIR;
1779 mode = 0755 | AE_IFDIR;
1782 if (file_attr & ATTR_READONLY) {
1783 mode = 0444 | AE_IFREG;
1785 mode = 0644 | AE_IFREG;
1789 archive_entry_set_mode(entry, mode);
1791 if (file_attr & (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM)) {
1792 char *fflags_text, *ptr;
1793 /* allocate for "rdonly,hidden,system," */
1794 fflags_text = malloc(22 * sizeof(char));
1795 if (fflags_text != NULL) {
1797 if (file_attr & ATTR_READONLY) {
1798 strcpy(ptr, "rdonly,");
1801 if (file_attr & ATTR_HIDDEN) {
1802 strcpy(ptr, "hidden,");
1805 if (file_attr & ATTR_SYSTEM) {
1806 strcpy(ptr, "system,");
1809 if (ptr > fflags_text) {
1810 /* Delete trailing comma */
1812 archive_entry_copy_fflags_text(entry,
1818 } else if(host_os == HOST_UNIX) {
1819 /* Host OS is Unix */
1820 archive_entry_set_mode(entry, (__LA_MODE_T) file_attr);
1822 /* Unknown host OS */
1823 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1824 "Unsupported Host OS: 0x%x", (int) host_os);
1826 return ARCHIVE_FATAL;
1829 if(!read_var_sized(a, &name_size, NULL))
1832 if(!read_ahead(a, name_size, &p))
1835 if(name_size > (MAX_NAME_IN_CHARS - 1)) {
1836 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1837 "Filename is too long");
1839 return ARCHIVE_FATAL;
1842 if(name_size == 0) {
1843 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1844 "No filename specified");
1846 return ARCHIVE_FATAL;
1849 memcpy(name_utf8_buf, p, name_size);
1850 name_utf8_buf[name_size] = 0;
1851 if(ARCHIVE_OK != consume(a, name_size)) {
1855 archive_entry_update_pathname_utf8(entry, name_utf8_buf);
1857 if(extra_data_size > 0) {
1858 int ret = process_head_file_extra(a, entry, rar,
1862 * TODO: rewrite or remove useless sanity check
1863 * as extra_data_size is not passed as a pointer
1865 if(extra_data_size < 0) {
1866 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1867 "File extra data size is not zero");
1868 return ARCHIVE_FATAL;
1872 if(ret != ARCHIVE_OK)
1876 if((file_flags & UNKNOWN_UNPACKED_SIZE) == 0) {
1877 rar->file.unpacked_size = (ssize_t) unpacked_size;
1878 if(rar->file.redir_type == REDIR_TYPE_NONE)
1879 archive_entry_set_size(entry, unpacked_size);
1882 if(file_flags & UTIME) {
1883 archive_entry_set_mtime(entry, (time_t) mtime, 0);
1886 if(file_flags & CRC32) {
1887 rar->file.stored_crc32 = crc;
1890 if(!rar->cstate.switch_multivolume) {
1891 /* Do not reinitialize unpacking state if we're switching
1893 rar->cstate.block_parsing_finished = 1;
1894 rar->cstate.all_filters_applied = 1;
1895 rar->cstate.initialized = 0;
1898 if(rar->generic.split_before > 0) {
1899 /* If now we're standing on a header that has a 'split before'
1900 * mark, it means we're standing on a 'continuation' file
1901 * header. Signal the caller that if it wants to move to
1902 * another file, it must call rar5_read_header() function
1905 return ARCHIVE_RETRY;
1911 static int process_head_service(struct archive_read* a, struct rar5* rar,
1912 struct archive_entry* entry, size_t block_flags)
1914 /* Process this SERVICE block the same way as FILE blocks. */
1915 int ret = process_head_file(a, rar, entry, block_flags);
1916 if(ret != ARCHIVE_OK)
1919 rar->file.service = 1;
1921 /* But skip the data part automatically. It's no use for the user
1922 * anyway. It contains only service data, not even needed to
1923 * properly unpack the file. */
1924 ret = rar5_read_data_skip(a);
1925 if(ret != ARCHIVE_OK)
1928 /* After skipping, try parsing another block automatically. */
1929 return ARCHIVE_RETRY;
1932 static int process_head_main(struct archive_read* a, struct rar5* rar,
1933 struct archive_entry* entry, size_t block_flags)
1936 size_t extra_data_size = 0;
1937 size_t extra_field_size = 0;
1938 size_t extra_field_id = 0;
1939 size_t archive_flags = 0;
1942 VOLUME = 0x0001, /* multi-volume archive */
1943 VOLUME_NUMBER = 0x0002, /* volume number, first vol doesn't
1945 SOLID = 0x0004, /* solid archive */
1946 PROTECT = 0x0008, /* contains Recovery info */
1947 LOCK = 0x0010, /* readonly flag, not used */
1951 // Just one attribute here.
1957 if(block_flags & HFL_EXTRA_DATA) {
1958 if(!read_var_sized(a, &extra_data_size, NULL))
1961 extra_data_size = 0;
1964 if(!read_var_sized(a, &archive_flags, NULL)) {
1968 rar->main.volume = (archive_flags & VOLUME) > 0;
1969 rar->main.solid = (archive_flags & SOLID) > 0;
1971 if(archive_flags & VOLUME_NUMBER) {
1973 if(!read_var_sized(a, &v, NULL)) {
1978 archive_set_error(&a->archive,
1979 ARCHIVE_ERRNO_FILE_FORMAT,
1980 "Invalid volume number");
1981 return ARCHIVE_FATAL;
1984 rar->main.vol_no = (unsigned int) v;
1986 rar->main.vol_no = 0;
1989 if(rar->vol.expected_vol_no > 0 &&
1990 rar->main.vol_no != rar->vol.expected_vol_no)
1992 /* Returning EOF instead of FATAL because of strange
1993 * libarchive behavior. When opening multiple files via
1994 * archive_read_open_filenames(), after reading up the whole
1995 * last file, the __archive_read_ahead function wraps up to
1996 * the first archive instead of returning EOF. */
2000 if(extra_data_size == 0) {
2005 if(!read_var_sized(a, &extra_field_size, NULL)) {
2009 if(!read_var_sized(a, &extra_field_id, NULL)) {
2013 if(extra_field_size == 0) {
2014 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2015 "Invalid extra field size");
2016 return ARCHIVE_FATAL;
2019 switch(extra_field_id) {
2021 ret = process_main_locator_extra_block(a, rar);
2022 if(ret != ARCHIVE_OK) {
2023 /* Error while parsing main locator extra
2030 archive_set_error(&a->archive,
2031 ARCHIVE_ERRNO_FILE_FORMAT,
2032 "Unsupported extra type (0x%x)",
2033 (int) extra_field_id);
2034 return ARCHIVE_FATAL;
2040 static int skip_unprocessed_bytes(struct archive_read* a) {
2041 struct rar5* rar = get_context(a);
2044 if(rar->file.bytes_remaining) {
2045 /* Use different skipping method in block merging mode than in
2046 * normal mode. If merge mode is active, rar5_read_data_skip
2047 * can't be used, because it could allow recursive use of
2048 * merge_block() * function, and this function doesn't support
2050 if(rar->merge_mode) {
2051 /* Discard whole merged block. This is valid in solid
2052 * mode as well, because the code will discard blocks
2053 * only if those blocks are safe to discard (i.e.
2054 * they're not FILE blocks). */
2055 ret = consume(a, rar->file.bytes_remaining);
2056 if(ret != ARCHIVE_OK) {
2059 rar->file.bytes_remaining = 0;
2061 /* If we're not in merge mode, use safe skipping code.
2062 * This will ensure we'll handle solid archives
2064 ret = rar5_read_data_skip(a);
2065 if(ret != ARCHIVE_OK) {
2074 static int scan_for_signature(struct archive_read* a);
2076 /* Base block processing function. A 'base block' is a RARv5 header block
2077 * that tells the reader what kind of data is stored inside the block.
2079 * From the birds-eye view a RAR file looks file this:
2081 * <magic><base_block_1><base_block_2>...<base_block_n>
2083 * There are a few types of base blocks. Those types are specified inside
2084 * the 'switch' statement in this function. For example purposes, I'll write
2085 * how a standard RARv5 file could look like here:
2087 * <magic><MAIN><FILE><FILE><FILE><SERVICE><ENDARC>
2089 * The structure above could describe an archive file with 3 files in it,
2090 * one service "QuickOpen" block (that is ignored by this parser), and an
2091 * end of file base block marker.
2093 * If the file is stored in multiple archive files ("multiarchive"), it might
2096 * .part01.rar: <magic><MAIN><FILE><ENDARC>
2097 * .part02.rar: <magic><MAIN><FILE><ENDARC>
2098 * .part03.rar: <magic><MAIN><FILE><ENDARC>
2100 * This example could describe 3 RAR files that contain ONE archived file.
2101 * Or it could describe 3 RAR files that contain 3 different files. Or 3
2102 * RAR files than contain 2 files. It all depends what metadata is stored in
2103 * the headers of <FILE> blocks.
2105 * Each <FILE> block contains info about its size, the name of the file it's
2106 * storing inside, and whether this FILE block is a continuation block of
2107 * previous archive ('split before'), and is this FILE block should be
2108 * continued in another archive ('split after'). By parsing the 'split before'
2109 * and 'split after' flags, we're able to tell if multiple <FILE> base blocks
2110 * are describing one file, or multiple files (with the same filename, for
2113 * One thing to note is that if we're parsing the first <FILE> block, and
2114 * we see 'split after' flag, then we need to jump over to another <FILE>
2115 * block to be able to decompress rest of the data. To do this, we need
2116 * to skip the <ENDARC> block, then switch to another file, then skip the
2117 * <magic> block, <MAIN> block, and then we're standing on the proper
2121 static int process_base_block(struct archive_read* a,
2122 struct archive_entry* entry)
2124 const size_t SMALLEST_RAR5_BLOCK_SIZE = 3;
2126 struct rar5* rar = get_context(a);
2127 uint32_t hdr_crc, computed_crc;
2128 size_t raw_hdr_size = 0, hdr_size_len, hdr_size;
2129 size_t header_id = 0;
2130 size_t header_flags = 0;
2135 HEAD_MARK = 0x00, HEAD_MAIN = 0x01, HEAD_FILE = 0x02,
2136 HEAD_SERVICE = 0x03, HEAD_CRYPT = 0x04, HEAD_ENDARC = 0x05,
2137 HEAD_UNKNOWN = 0xff,
2140 /* Skip any unprocessed data for this file. */
2141 ret = skip_unprocessed_bytes(a);
2142 if(ret != ARCHIVE_OK)
2145 /* Read the expected CRC32 checksum. */
2146 if(!read_u32(a, &hdr_crc)) {
2150 /* Read header size. */
2151 if(!read_var_sized(a, &raw_hdr_size, &hdr_size_len)) {
2155 hdr_size = raw_hdr_size + hdr_size_len;
2157 /* Sanity check, maximum header size for RAR5 is 2MB. */
2158 if(hdr_size > (2 * 1024 * 1024)) {
2159 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2160 "Base block header is too large");
2162 return ARCHIVE_FATAL;
2165 /* Additional sanity checks to weed out invalid files. */
2166 if(raw_hdr_size == 0 || hdr_size_len == 0 ||
2167 hdr_size < SMALLEST_RAR5_BLOCK_SIZE)
2169 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2170 "Too small block encountered (%zu bytes)",
2173 return ARCHIVE_FATAL;
2176 /* Read the whole header data into memory, maximum memory use here is
2178 if(!read_ahead(a, hdr_size, &p)) {
2182 /* Verify the CRC32 of the header data. */
2183 computed_crc = (uint32_t) crc32(0, p, (int) hdr_size);
2184 if(computed_crc != hdr_crc) {
2185 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2186 "Header CRC error");
2188 return ARCHIVE_FATAL;
2191 /* If the checksum is OK, we proceed with parsing. */
2192 if(ARCHIVE_OK != consume(a, hdr_size_len)) {
2196 if(!read_var_sized(a, &header_id, NULL))
2199 if(!read_var_sized(a, &header_flags, NULL))
2202 rar->generic.split_after = (header_flags & HFL_SPLIT_AFTER) > 0;
2203 rar->generic.split_before = (header_flags & HFL_SPLIT_BEFORE) > 0;
2204 rar->generic.size = (int)hdr_size;
2205 rar->generic.last_header_id = (int)header_id;
2206 rar->main.endarc = 0;
2208 /* Those are possible header ids in RARv5. */
2211 ret = process_head_main(a, rar, entry, header_flags);
2213 /* Main header doesn't have any files in it, so it's
2214 * pointless to return to the caller. Retry to next
2215 * header, which should be HEAD_FILE/HEAD_SERVICE. */
2216 if(ret == ARCHIVE_OK)
2217 return ARCHIVE_RETRY;
2221 ret = process_head_service(a, rar, entry, header_flags);
2224 ret = process_head_file(a, rar, entry, header_flags);
2227 archive_set_error(&a->archive,
2228 ARCHIVE_ERRNO_FILE_FORMAT,
2229 "Encryption is not supported");
2230 return ARCHIVE_FATAL;
2232 rar->main.endarc = 1;
2234 /* After encountering an end of file marker, we need
2235 * to take into consideration if this archive is
2236 * continued in another file (i.e. is it part01.rar:
2237 * is there a part02.rar?) */
2238 if(rar->main.volume) {
2239 /* In case there is part02.rar, position the
2240 * read pointer in a proper place, so we can
2241 * resume parsing. */
2242 ret = scan_for_signature(a);
2243 if(ret == ARCHIVE_FATAL) {
2246 if(rar->vol.expected_vol_no ==
2248 archive_set_error(&a->archive,
2249 ARCHIVE_ERRNO_FILE_FORMAT,
2251 return ARCHIVE_FATAL;
2254 rar->vol.expected_vol_no =
2255 rar->main.vol_no + 1;
2264 if((header_flags & HFL_SKIP_IF_UNKNOWN) == 0) {
2265 archive_set_error(&a->archive,
2266 ARCHIVE_ERRNO_FILE_FORMAT,
2267 "Header type error");
2268 return ARCHIVE_FATAL;
2270 /* If the block is marked as 'skip if unknown',
2271 * do as the flag says: skip the block
2272 * instead on failing on it. */
2273 return ARCHIVE_RETRY;
2279 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
2280 "Internal unpacker error");
2281 return ARCHIVE_FATAL;
2285 static int skip_base_block(struct archive_read* a) {
2287 struct rar5* rar = get_context(a);
2289 /* Create a new local archive_entry structure that will be operated on
2290 * by header reader; operations on this archive_entry will be discarded.
2292 struct archive_entry* entry = archive_entry_new();
2293 ret = process_base_block(a, entry);
2295 /* Discard operations on this archive_entry structure. */
2296 archive_entry_free(entry);
2297 if(ret == ARCHIVE_FATAL)
2300 if(rar->generic.last_header_id == 2 && rar->generic.split_before > 0)
2303 if(ret == ARCHIVE_OK)
2304 return ARCHIVE_RETRY;
2309 static int rar5_read_header(struct archive_read *a,
2310 struct archive_entry *entry)
2312 struct rar5* rar = get_context(a);
2315 if(rar->header_initialized == 0) {
2317 rar->header_initialized = 1;
2320 if(rar->skipped_magic == 0) {
2321 if(ARCHIVE_OK != consume(a, sizeof(rar5_signature_xor))) {
2325 rar->skipped_magic = 1;
2329 ret = process_base_block(a, entry);
2330 } while(ret == ARCHIVE_RETRY ||
2331 (rar->main.endarc > 0 && ret == ARCHIVE_OK));
2336 static void init_unpack(struct rar5* rar) {
2337 rar->file.calculated_crc32 = 0;
2338 init_window_mask(rar);
2340 free(rar->cstate.window_buf);
2341 free(rar->cstate.filtered_buf);
2343 if(rar->cstate.window_size > 0) {
2344 rar->cstate.window_buf = calloc(1, rar->cstate.window_size);
2345 rar->cstate.filtered_buf = calloc(1, rar->cstate.window_size);
2347 rar->cstate.window_buf = NULL;
2348 rar->cstate.filtered_buf = NULL;
2351 rar->cstate.write_ptr = 0;
2352 rar->cstate.last_write_ptr = 0;
2354 memset(&rar->cstate.bd, 0, sizeof(rar->cstate.bd));
2355 memset(&rar->cstate.ld, 0, sizeof(rar->cstate.ld));
2356 memset(&rar->cstate.dd, 0, sizeof(rar->cstate.dd));
2357 memset(&rar->cstate.ldd, 0, sizeof(rar->cstate.ldd));
2358 memset(&rar->cstate.rd, 0, sizeof(rar->cstate.rd));
2361 static void update_crc(struct rar5* rar, const uint8_t* p, size_t to_read) {
2364 if(rar->skip_mode) {
2365 #if defined CHECK_CRC_ON_SOLID_SKIP
2374 /* Don't update CRC32 if the file doesn't have the
2375 * `stored_crc32` info filled in. */
2376 if(rar->file.stored_crc32 > 0) {
2377 rar->file.calculated_crc32 =
2378 crc32(rar->file.calculated_crc32, p, to_read);
2381 /* Check if the file uses an optional BLAKE2sp checksum
2383 if(rar->file.has_blake2 > 0) {
2384 /* Return value of the `update` function is always 0,
2385 * so we can explicitly ignore it here. */
2386 (void) blake2sp_update(&rar->file.b2state, p, to_read);
2391 static int create_decode_tables(uint8_t* bit_length,
2392 struct decode_table* table, int size)
2394 int code, upper_limit = 0, i, lc[16];
2395 uint32_t decode_pos_clone[rar5_countof(table->decode_pos)];
2396 ssize_t cur_len, quick_data_size;
2398 memset(&lc, 0, sizeof(lc));
2399 memset(table->decode_num, 0, sizeof(table->decode_num));
2401 table->quick_bits = size == HUFF_NC ? 10 : 7;
2403 for(i = 0; i < size; i++) {
2404 lc[bit_length[i] & 15]++;
2408 table->decode_pos[0] = 0;
2409 table->decode_len[0] = 0;
2411 for(i = 1; i < 16; i++) {
2412 upper_limit += lc[i];
2414 table->decode_len[i] = upper_limit << (16 - i);
2415 table->decode_pos[i] = table->decode_pos[i - 1] + lc[i - 1];
2420 memcpy(decode_pos_clone, table->decode_pos, sizeof(decode_pos_clone));
2422 for(i = 0; i < size; i++) {
2423 uint8_t clen = bit_length[i] & 15;
2425 int last_pos = decode_pos_clone[clen];
2426 table->decode_num[last_pos] = i;
2427 decode_pos_clone[clen]++;
2431 quick_data_size = (int64_t)1 << table->quick_bits;
2433 for(code = 0; code < quick_data_size; code++) {
2434 int bit_field = code << (16 - table->quick_bits);
2437 while(cur_len < rar5_countof(table->decode_len) &&
2438 bit_field >= table->decode_len[cur_len]) {
2442 table->quick_len[code] = (uint8_t) cur_len;
2444 dist = bit_field - table->decode_len[cur_len - 1];
2445 dist >>= (16 - cur_len);
2447 pos = table->decode_pos[cur_len & 15] + dist;
2448 if(cur_len < rar5_countof(table->decode_pos) && pos < size) {
2449 table->quick_num[code] = table->decode_num[pos];
2451 table->quick_num[code] = 0;
2458 static int decode_number(struct archive_read* a, struct decode_table* table,
2459 const uint8_t* p, uint16_t* num)
2461 int i, bits, dist, ret;
2464 struct rar5* rar = get_context(a);
2466 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &bitfield))) {
2472 if(bitfield < table->decode_len[table->quick_bits]) {
2473 int code = bitfield >> (16 - table->quick_bits);
2474 skip_bits(rar, table->quick_len[code]);
2475 *num = table->quick_num[code];
2481 for(i = table->quick_bits + 1; i < 15; i++) {
2482 if(bitfield < table->decode_len[i]) {
2488 skip_bits(rar, bits);
2490 dist = bitfield - table->decode_len[bits - 1];
2491 dist >>= (16 - bits);
2492 pos = table->decode_pos[bits] + dist;
2494 if(pos >= table->size)
2497 *num = table->decode_num[pos];
2501 /* Reads and parses Huffman tables from the beginning of the block. */
2502 static int parse_tables(struct archive_read* a, struct rar5* rar,
2505 int ret, value, i, w, idx = 0;
2506 uint8_t bit_length[HUFF_BC],
2507 table[HUFF_TABLE_SIZE],
2511 enum { ESCAPE = 15 };
2513 /* The data for table generation is compressed using a simple RLE-like
2514 * algorithm when storing zeroes, so we need to unpack it first. */
2515 for(w = 0, i = 0; w < HUFF_BC;) {
2516 if(i >= rar->cstate.cur_block_size) {
2517 /* Truncated data, can't continue. */
2518 archive_set_error(&a->archive,
2519 ARCHIVE_ERRNO_FILE_FORMAT,
2520 "Truncated data in huffman tables");
2521 return ARCHIVE_FATAL;
2524 value = (p[i] & nibble_mask) >> nibble_shift;
2526 if(nibble_mask == 0x0F)
2529 nibble_mask ^= 0xFF;
2532 /* Values smaller than 15 is data, so we write it directly.
2533 * Value 15 is a flag telling us that we need to unpack more
2535 if(value == ESCAPE) {
2536 value = (p[i] & nibble_mask) >> nibble_shift;
2537 if(nibble_mask == 0x0F)
2539 nibble_mask ^= 0xFF;
2543 /* We sometimes need to write the actual value
2544 * of 15, so this case handles that. */
2545 bit_length[w++] = ESCAPE;
2550 for(k = 0; (k < value + 2) && (w < HUFF_BC);
2552 bit_length[w++] = 0;
2556 bit_length[w++] = value;
2560 rar->bits.in_addr = i;
2561 rar->bits.bit_addr = nibble_shift ^ 4;
2563 ret = create_decode_tables(bit_length, &rar->cstate.bd, HUFF_BC);
2564 if(ret != ARCHIVE_OK) {
2565 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2566 "Decoding huffman tables failed");
2567 return ARCHIVE_FATAL;
2570 for(i = 0; i < HUFF_TABLE_SIZE;) {
2573 ret = decode_number(a, &rar->cstate.bd, p, &num);
2574 if(ret != ARCHIVE_OK) {
2575 archive_set_error(&a->archive,
2576 ARCHIVE_ERRNO_FILE_FORMAT,
2577 "Decoding huffman tables failed");
2578 return ARCHIVE_FATAL;
2582 /* 0..15: store directly */
2583 table[i] = (uint8_t) num;
2585 } else if(num < 18) {
2586 /* 16..17: repeat previous code */
2589 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &n)))
2603 while(n-- > 0 && i < HUFF_TABLE_SIZE) {
2604 table[i] = table[i - 1];
2608 archive_set_error(&a->archive,
2609 ARCHIVE_ERRNO_FILE_FORMAT,
2610 "Unexpected error when decoding "
2612 return ARCHIVE_FATAL;
2615 /* other codes: fill with zeroes `n` times */
2618 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &n)))
2631 while(n-- > 0 && i < HUFF_TABLE_SIZE)
2636 ret = create_decode_tables(&table[idx], &rar->cstate.ld, HUFF_NC);
2637 if(ret != ARCHIVE_OK) {
2638 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2639 "Failed to create literal table");
2640 return ARCHIVE_FATAL;
2645 ret = create_decode_tables(&table[idx], &rar->cstate.dd, HUFF_DC);
2646 if(ret != ARCHIVE_OK) {
2647 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2648 "Failed to create distance table");
2649 return ARCHIVE_FATAL;
2654 ret = create_decode_tables(&table[idx], &rar->cstate.ldd, HUFF_LDC);
2655 if(ret != ARCHIVE_OK) {
2656 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2657 "Failed to create lower bits of distances table");
2658 return ARCHIVE_FATAL;
2663 ret = create_decode_tables(&table[idx], &rar->cstate.rd, HUFF_RC);
2664 if(ret != ARCHIVE_OK) {
2665 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2666 "Failed to create repeating distances table");
2667 return ARCHIVE_FATAL;
2673 /* Parses the block header, verifies its CRC byte, and saves the header
2674 * fields inside the `hdr` pointer. */
2675 static int parse_block_header(struct archive_read* a, const uint8_t* p,
2676 ssize_t* block_size, struct compressed_block_header* hdr)
2678 uint8_t calculated_cksum;
2679 memcpy(hdr, p, sizeof(struct compressed_block_header));
2681 if(bf_byte_count(hdr) > 2) {
2682 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2683 "Unsupported block header size (was %d, max is 2)",
2684 bf_byte_count(hdr));
2685 return ARCHIVE_FATAL;
2688 /* This should probably use bit reader interface in order to be more
2691 switch(bf_byte_count(hdr)) {
2692 /* 1-byte block size */
2694 *block_size = *(const uint8_t*) &p[2];
2697 /* 2-byte block size */
2699 *block_size = archive_le16dec(&p[2]);
2702 /* 3-byte block size */
2704 *block_size = archive_le32dec(&p[2]);
2705 *block_size &= 0x00FFFFFF;
2708 /* Other block sizes are not supported. This case is not
2709 * reached, because we have an 'if' guard before the switch
2710 * that makes sure of it. */
2712 return ARCHIVE_FATAL;
2715 /* Verify the block header checksum. 0x5A is a magic value and is
2716 * always * constant. */
2717 calculated_cksum = 0x5A
2718 ^ (uint8_t) hdr->block_flags_u8
2719 ^ (uint8_t) *block_size
2720 ^ (uint8_t) (*block_size >> 8)
2721 ^ (uint8_t) (*block_size >> 16);
2723 if(calculated_cksum != hdr->block_cksum) {
2724 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2725 "Block checksum error: got 0x%x, expected 0x%x",
2726 hdr->block_cksum, calculated_cksum);
2728 return ARCHIVE_FATAL;
2734 /* Convenience function used during filter processing. */
2735 static int parse_filter_data(struct archive_read* a, struct rar5* rar,
2736 const uint8_t* p, uint32_t* filter_data)
2741 if(ARCHIVE_OK != (ret = read_consume_bits(a, rar, p, 2, &bytes)))
2746 for(i = 0; i < bytes; i++) {
2749 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &byte))) {
2753 /* Cast to uint32_t will ensure the shift operation will not
2754 * produce undefined result. */
2755 data += ((uint32_t) byte >> 8) << (i * 8);
2759 *filter_data = data;
2763 /* Function is used during sanity checking. */
2764 static int is_valid_filter_block_start(struct rar5* rar,
2767 const int64_t block_start = (ssize_t) start + rar->cstate.write_ptr;
2768 const int64_t last_bs = rar->cstate.last_block_start;
2769 const ssize_t last_bl = rar->cstate.last_block_length;
2771 if(last_bs == 0 || last_bl == 0) {
2772 /* We didn't have any filters yet, so accept this offset. */
2776 if(block_start >= last_bs + last_bl) {
2777 /* Current offset is bigger than last block's end offset, so
2778 * accept current offset. */
2782 /* Any other case is not a normal situation and we should fail. */
2786 /* The function will create a new filter, read its parameters from the input
2787 * stream and add it to the filter collection. */
2788 static int parse_filter(struct archive_read* ar, const uint8_t* p) {
2789 uint32_t block_start, block_length;
2790 uint16_t filter_type;
2791 struct filter_info* filt = NULL;
2792 struct rar5* rar = get_context(ar);
2795 /* Read the parameters from the input stream. */
2796 if(ARCHIVE_OK != (ret = parse_filter_data(ar, rar, p, &block_start)))
2799 if(ARCHIVE_OK != (ret = parse_filter_data(ar, rar, p, &block_length)))
2802 if(ARCHIVE_OK != (ret = read_bits_16(ar, rar, p, &filter_type)))
2808 /* Perform some sanity checks on this filter parameters. Note that we
2809 * allow only DELTA, E8/E9 and ARM filters here, because rest of
2810 * filters are not used in RARv5. */
2812 if(block_length < 4 ||
2813 block_length > 0x400000 ||
2814 filter_type > FILTER_ARM ||
2815 !is_valid_filter_block_start(rar, block_start))
2817 archive_set_error(&ar->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2818 "Invalid filter encountered");
2819 return ARCHIVE_FATAL;
2822 /* Allocate a new filter. */
2823 filt = add_new_filter(rar);
2825 archive_set_error(&ar->archive, ENOMEM,
2826 "Can't allocate memory for a filter descriptor.");
2827 return ARCHIVE_FATAL;
2830 filt->type = filter_type;
2831 filt->block_start = rar->cstate.write_ptr + block_start;
2832 filt->block_length = block_length;
2834 rar->cstate.last_block_start = filt->block_start;
2835 rar->cstate.last_block_length = filt->block_length;
2837 /* Read some more data in case this is a DELTA filter. Other filter
2838 * types don't require any additional data over what was already
2840 if(filter_type == FILTER_DELTA) {
2843 if(ARCHIVE_OK != (ret = read_consume_bits(ar, rar, p, 5, &channels)))
2846 filt->channels = channels + 1;
2852 static int decode_code_length(struct archive_read* a, struct rar5* rar,
2853 const uint8_t* p, uint16_t code)
2855 int lbits, length = 2;
2861 lbits = code / 4 - 1;
2862 length += (4 | (code & 3)) << lbits;
2868 if(ARCHIVE_OK != read_consume_bits(a, rar, p, lbits, &add))
2877 static int copy_string(struct archive_read* a, int len, int dist) {
2878 struct rar5* rar = get_context(a);
2879 const uint64_t cmask = rar->cstate.window_mask;
2880 const uint64_t write_ptr = rar->cstate.write_ptr +
2881 rar->cstate.solid_offset;
2884 if (rar->cstate.window_buf == NULL)
2885 return ARCHIVE_FATAL;
2887 /* The unpacker spends most of the time in this function. It would be
2888 * a good idea to introduce some optimizations here.
2890 * Just remember that this loop treats buffers that overlap differently
2891 * than buffers that do not overlap. This is why a simple memcpy(3)
2892 * call will not be enough. */
2894 for(i = 0; i < len; i++) {
2895 const ssize_t write_idx = (write_ptr + i) & cmask;
2896 const ssize_t read_idx = (write_ptr + i - dist) & cmask;
2897 rar->cstate.window_buf[write_idx] =
2898 rar->cstate.window_buf[read_idx];
2901 rar->cstate.write_ptr += len;
2905 static int do_uncompress_block(struct archive_read* a, const uint8_t* p) {
2906 struct rar5* rar = get_context(a);
2910 const uint64_t cmask = rar->cstate.window_mask;
2911 const struct compressed_block_header* hdr = &rar->last_block_hdr;
2912 const uint8_t bit_size = 1 + bf_bit_size(hdr);
2915 if(rar->cstate.write_ptr - rar->cstate.last_write_ptr >
2916 (rar->cstate.window_size >> 1)) {
2917 /* Don't allow growing data by more than half of the
2918 * window size at a time. In such case, break the loop;
2919 * next call to this function will continue processing
2920 * from this moment. */
2924 if(rar->bits.in_addr > rar->cstate.cur_block_size - 1 ||
2925 (rar->bits.in_addr == rar->cstate.cur_block_size - 1 &&
2926 rar->bits.bit_addr >= bit_size))
2928 /* If the program counter is here, it means the
2929 * function has finished processing the block. */
2930 rar->cstate.block_parsing_finished = 1;
2934 /* Decode the next literal. */
2935 if(ARCHIVE_OK != decode_number(a, &rar->cstate.ld, p, &num)) {
2939 /* Num holds a decompression literal, or 'command code'.
2941 * - Values lower than 256 are just bytes. Those codes
2942 * can be stored in the output buffer directly.
2944 * - Code 256 defines a new filter, which is later used to
2945 * ransform the data block accordingly to the filter type.
2946 * The data block needs to be fully uncompressed first.
2948 * - Code bigger than 257 and smaller than 262 define
2949 * a repetition pattern that should be copied from
2950 * an already uncompressed chunk of data.
2954 /* Directly store the byte. */
2955 int64_t write_idx = rar->cstate.solid_offset +
2956 rar->cstate.write_ptr++;
2958 rar->cstate.window_buf[write_idx & cmask] =
2961 } else if(num >= 262) {
2963 int len = decode_code_length(a, rar, p, num - 262),
2968 archive_set_error(&a->archive,
2969 ARCHIVE_ERRNO_PROGRAMMER,
2970 "Failed to decode the code length");
2972 return ARCHIVE_FATAL;
2975 if(ARCHIVE_OK != decode_number(a, &rar->cstate.dd, p,
2978 archive_set_error(&a->archive,
2979 ARCHIVE_ERRNO_PROGRAMMER,
2980 "Failed to decode the distance slot");
2982 return ARCHIVE_FATAL;
2989 dbits = dist_slot / 2 - 1;
2991 /* Cast to uint32_t will make sure the shift
2992 * left operation won't produce undefined
2993 * result. Then, the uint32_t type will
2994 * be implicitly casted to int. */
2995 dist += (uint32_t) (2 |
2996 (dist_slot & 1)) << dbits;
3005 if(ARCHIVE_OK != (ret = read_bits_32(
3006 a, rar, p, &add))) {
3013 skip_bits(rar, dbits - 4);
3019 if(ARCHIVE_OK != decode_number(a,
3020 &rar->cstate.ldd, p, &low_dist))
3022 archive_set_error(&a->archive,
3023 ARCHIVE_ERRNO_PROGRAMMER,
3024 "Failed to decode the "
3027 return ARCHIVE_FATAL;
3030 if(dist >= INT_MAX - low_dist - 1) {
3031 /* This only happens in
3032 * invalid archives. */
3033 archive_set_error(&a->archive,
3034 ARCHIVE_ERRNO_FILE_FORMAT,
3037 return ARCHIVE_FATAL;
3042 /* dbits is one of [0,1,2,3] */
3045 if(ARCHIVE_OK != (ret = read_consume_bits(a, rar,
3047 /* Return EOF if we can't read
3062 if(dist > 0x40000) {
3068 dist_cache_push(rar, dist);
3069 rar->cstate.last_len = len;
3071 if(ARCHIVE_OK != copy_string(a, len, dist))
3072 return ARCHIVE_FATAL;
3075 } else if(num == 256) {
3076 /* Create a filter. */
3077 ret = parse_filter(a, p);
3078 if(ret != ARCHIVE_OK)
3082 } else if(num == 257) {
3083 if(rar->cstate.last_len != 0) {
3084 if(ARCHIVE_OK != copy_string(a,
3085 rar->cstate.last_len,
3086 rar->cstate.dist_cache[0]))
3088 return ARCHIVE_FATAL;
3095 const int idx = num - 258;
3096 const int dist = dist_cache_touch(rar, idx);
3101 if(ARCHIVE_OK != decode_number(a, &rar->cstate.rd, p,
3103 return ARCHIVE_FATAL;
3106 len = decode_code_length(a, rar, p, len_slot);
3108 return ARCHIVE_FATAL;
3111 rar->cstate.last_len = len;
3113 if(ARCHIVE_OK != copy_string(a, len, dist))
3114 return ARCHIVE_FATAL;
3123 /* Binary search for the RARv5 signature. */
3124 static int scan_for_signature(struct archive_read* a) {
3126 const int chunk_size = 512;
3128 char signature[sizeof(rar5_signature_xor)];
3130 /* If we're here, it means we're on an 'unknown territory' data.
3131 * There's no indication what kind of data we're reading here.
3132 * It could be some text comment, any kind of binary data,
3133 * digital sign, dragons, etc.
3135 * We want to find a valid RARv5 magic header inside this unknown
3138 /* Is it possible in libarchive to just skip everything until the
3139 * end of the file? If so, it would be a better approach than the
3140 * current implementation of this function. */
3142 rar5_signature(signature);
3145 if(!read_ahead(a, chunk_size, &p))
3148 for(i = 0; i < chunk_size - (int)sizeof(rar5_signature_xor);
3150 if(memcmp(&p[i], signature,
3151 sizeof(rar5_signature_xor)) == 0) {
3152 /* Consume the number of bytes we've used to
3153 * search for the signature, as well as the
3154 * number of bytes used by the signature
3155 * itself. After this we should be standing
3156 * on a valid base block header. */
3158 i + sizeof(rar5_signature_xor));
3163 consume(a, chunk_size);
3166 return ARCHIVE_FATAL;
3169 /* This function will switch the multivolume archive file to another file,
3170 * i.e. from part03 to part 04. */
3171 static int advance_multivolume(struct archive_read* a) {
3173 struct rar5* rar = get_context(a);
3175 /* A small state machine that will skip unnecessary data, needed to
3176 * switch from one multivolume to another. Such skipping is needed if
3177 * we want to be an stream-oriented (instead of file-oriented)
3180 * The state machine starts with `rar->main.endarc` == 0. It also
3181 * assumes that current stream pointer points to some base block
3184 * The `endarc` field is being set when the base block parsing
3185 * function encounters the 'end of archive' marker.
3189 if(rar->main.endarc == 1) {
3192 rar->main.endarc = 0;
3195 lret = skip_base_block(a);
3198 /* Continue looping. */
3205 /* Forward any errors to the
3213 /* Skip current base block. In order to properly skip
3214 * it, we really need to simply parse it and discard
3217 lret = skip_base_block(a);
3218 if(lret == ARCHIVE_FATAL || lret == ARCHIVE_FAILED)
3221 /* The `skip_base_block` function tells us if we
3222 * should continue with skipping, or we should stop
3223 * skipping. We're trying to skip everything up to
3224 * a base FILE block. */
3226 if(lret != ARCHIVE_RETRY) {
3227 /* If there was an error during skipping, or we
3228 * have just skipped a FILE base block... */
3230 if(rar->main.endarc == 0) {
3242 /* Merges the partial block from the first multivolume archive file, and
3243 * partial block from the second multivolume archive file. The result is
3244 * a chunk of memory containing the whole block, and the stream pointer
3245 * is advanced to the next block in the second multivolume archive file. */
3246 static int merge_block(struct archive_read* a, ssize_t block_size,
3249 struct rar5* rar = get_context(a);
3250 ssize_t cur_block_size, partial_offset = 0;
3254 if(rar->merge_mode) {
3255 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
3256 "Recursive merge is not allowed");
3258 return ARCHIVE_FATAL;
3261 /* Set a flag that we're in the switching mode. */
3262 rar->cstate.switch_multivolume = 1;
3264 /* Reallocate the memory which will hold the whole block. */
3265 if(rar->vol.push_buf)
3266 free((void*) rar->vol.push_buf);
3268 /* Increasing the allocation block by 8 is due to bit reading functions,
3269 * which are using additional 2 or 4 bytes. Allocating the block size
3270 * by exact value would make bit reader perform reads from invalid
3271 * memory block when reading the last byte from the buffer. */
3272 rar->vol.push_buf = malloc(block_size + 8);
3273 if(!rar->vol.push_buf) {
3274 archive_set_error(&a->archive, ENOMEM,
3275 "Can't allocate memory for a merge block buffer.");
3276 return ARCHIVE_FATAL;
3279 /* Valgrind complains if the extension block for bit reader is not
3280 * initialized, so initialize it. */
3281 memset(&rar->vol.push_buf[block_size], 0, 8);
3283 /* A single block can span across multiple multivolume archive files,
3284 * so we use a loop here. This loop will consume enough multivolume
3285 * archive files until the whole block is read. */
3288 /* Get the size of current block chunk in this multivolume
3289 * archive file and read it. */
3290 cur_block_size = rar5_min(rar->file.bytes_remaining,
3291 block_size - partial_offset);
3293 if(cur_block_size == 0) {
3294 archive_set_error(&a->archive,
3295 ARCHIVE_ERRNO_FILE_FORMAT,
3296 "Encountered block size == 0 during block merge");
3297 return ARCHIVE_FATAL;
3300 if(!read_ahead(a, cur_block_size, &lp))
3303 /* Sanity check; there should never be a situation where this
3304 * function reads more data than the block's size. */
3305 if(partial_offset + cur_block_size > block_size) {
3306 archive_set_error(&a->archive,
3307 ARCHIVE_ERRNO_PROGRAMMER,
3308 "Consumed too much data when merging blocks.");
3309 return ARCHIVE_FATAL;
3312 /* Merge previous block chunk with current block chunk,
3313 * or create first block chunk if this is our first
3315 memcpy(&rar->vol.push_buf[partial_offset], lp, cur_block_size);
3317 /* Advance the stream read pointer by this block chunk size. */
3318 if(ARCHIVE_OK != consume(a, cur_block_size))
3321 /* Update the pointers. `partial_offset` contains information
3322 * about the sum of merged block chunks. */
3323 partial_offset += cur_block_size;
3324 rar->file.bytes_remaining -= cur_block_size;
3326 /* If `partial_offset` is the same as `block_size`, this means
3327 * we've merged all block chunks and we have a valid full
3329 if(partial_offset == block_size) {
3333 /* If we don't have any bytes to read, this means we should
3334 * switch to another multivolume archive file. */
3335 if(rar->file.bytes_remaining == 0) {
3337 ret = advance_multivolume(a);
3339 if(ret != ARCHIVE_OK) {
3345 *p = rar->vol.push_buf;
3347 /* If we're here, we can resume unpacking by processing the block
3348 * pointed to by the `*p` memory pointer. */
3353 static int process_block(struct archive_read* a) {
3355 struct rar5* rar = get_context(a);
3358 /* If we don't have any data to be processed, this most probably means
3359 * we need to switch to the next volume. */
3360 if(rar->main.volume && rar->file.bytes_remaining == 0) {
3361 ret = advance_multivolume(a);
3362 if(ret != ARCHIVE_OK)
3366 if(rar->cstate.block_parsing_finished) {
3369 ssize_t cur_block_size;
3371 /* The header size won't be bigger than 6 bytes. */
3372 if(!read_ahead(a, 6, &p)) {
3373 /* Failed to prefetch data block header. */
3378 * Read block_size by parsing block header. Validate the header
3379 * by calculating CRC byte stored inside the header. Size of
3380 * the header is not constant (block size can be stored either
3381 * in 1 or 2 bytes), that's why block size is left out from the
3382 * `compressed_block_header` structure and returned by
3383 * `parse_block_header` as the second argument. */
3385 ret = parse_block_header(a, p, &block_size,
3386 &rar->last_block_hdr);
3387 if(ret != ARCHIVE_OK) {
3391 /* Skip block header. Next data is huffman tables,
3393 to_skip = sizeof(struct compressed_block_header) +
3394 bf_byte_count(&rar->last_block_hdr) + 1;
3396 if(ARCHIVE_OK != consume(a, to_skip))
3399 rar->file.bytes_remaining -= to_skip;
3401 /* The block size gives information about the whole block size,
3402 * but the block could be stored in split form when using
3403 * multi-volume archives. In this case, the block size will be
3404 * bigger than the actual data stored in this file. Remaining
3405 * part of the data will be in another file. */
3408 rar5_min(rar->file.bytes_remaining, block_size);
3410 if(block_size > rar->file.bytes_remaining) {
3411 /* If current blocks' size is bigger than our data
3412 * size, this means we have a multivolume archive.
3413 * In this case, skip all base headers until the end
3414 * of the file, proceed to next "partXXX.rar" volume,
3415 * find its signature, skip all headers up to the first
3416 * FILE base header, and continue from there.
3418 * Note that `merge_block` will update the `rar`
3419 * context structure quite extensively. */
3421 ret = merge_block(a, block_size, &p);
3422 if(ret != ARCHIVE_OK) {
3426 cur_block_size = block_size;
3428 /* Current stream pointer should be now directly
3429 * *after* the block that spanned through multiple
3430 * archive files. `p` pointer should have the data of
3431 * the *whole* block (merged from partial blocks
3432 * stored in multiple archives files). */
3434 rar->cstate.switch_multivolume = 0;
3436 /* Read the whole block size into memory. This can take
3437 * up to 8 megabytes of memory in theoretical cases.
3438 * Might be worth to optimize this and use a standard
3439 * chunk of 4kb's. */
3440 if(!read_ahead(a, 4 + cur_block_size, &p)) {
3441 /* Failed to prefetch block data. */
3446 rar->cstate.block_buf = p;
3447 rar->cstate.cur_block_size = cur_block_size;
3448 rar->cstate.block_parsing_finished = 0;
3450 rar->bits.in_addr = 0;
3451 rar->bits.bit_addr = 0;
3453 if(bf_is_table_present(&rar->last_block_hdr)) {
3454 /* Load Huffman tables. */
3455 ret = parse_tables(a, rar, p);
3456 if(ret != ARCHIVE_OK) {
3457 /* Error during decompression of Huffman
3463 /* Block parsing not finished, reuse previous memory buffer. */
3464 p = rar->cstate.block_buf;
3467 /* Uncompress the block, or a part of it, depending on how many bytes
3468 * will be generated by uncompressing the block.
3470 * In case too many bytes will be generated, calling this function
3471 * again will resume the uncompression operation. */
3472 ret = do_uncompress_block(a, p);
3473 if(ret != ARCHIVE_OK) {
3477 if(rar->cstate.block_parsing_finished &&
3478 rar->cstate.switch_multivolume == 0 &&
3479 rar->cstate.cur_block_size > 0)
3481 /* If we're processing a normal block, consume the whole
3482 * block. We can do this because we've already read the whole
3483 * block to memory. */
3484 if(ARCHIVE_OK != consume(a, rar->cstate.cur_block_size))
3485 return ARCHIVE_FATAL;
3487 rar->file.bytes_remaining -= rar->cstate.cur_block_size;
3488 } else if(rar->cstate.switch_multivolume) {
3489 /* Don't consume the block if we're doing multivolume
3490 * processing. The volume switching function will consume
3491 * the proper count of bytes instead. */
3492 rar->cstate.switch_multivolume = 0;
3498 /* Pops the `buf`, `size` and `offset` from the "data ready" stack.
3500 * Returns ARCHIVE_OK when those arguments can be used, ARCHIVE_RETRY
3501 * when there is no data on the stack. */
3502 static int use_data(struct rar5* rar, const void** buf, size_t* size,
3507 for(i = 0; i < rar5_countof(rar->cstate.dready); i++) {
3508 struct data_ready *d = &rar->cstate.dready[i];
3511 if(buf) *buf = d->buf;
3512 if(size) *size = d->size;
3513 if(offset) *offset = d->offset;
3520 return ARCHIVE_RETRY;
3523 /* Pushes the `buf`, `size` and `offset` arguments to the rar->cstate.dready
3524 * FIFO stack. Those values will be popped from this stack by the `use_data`
3526 static int push_data_ready(struct archive_read* a, struct rar5* rar,
3527 const uint8_t* buf, size_t size, int64_t offset)
3531 /* Don't push if we're in skip mode. This is needed because solid
3532 * streams need full processing even if we're skipping data. After
3533 * fully processing the stream, we need to discard the generated bytes,
3534 * because we're interested only in the side effect: building up the
3535 * internal window circular buffer. This window buffer will be used
3536 * later during unpacking of requested data. */
3541 if(offset != rar->file.last_offset + rar->file.last_size) {
3542 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
3543 "Sanity check error: output stream is not continuous");
3544 return ARCHIVE_FATAL;
3547 for(i = 0; i < rar5_countof(rar->cstate.dready); i++) {
3548 struct data_ready* d = &rar->cstate.dready[i];
3555 /* These fields are used only in sanity checking. */
3556 rar->file.last_offset = offset;
3557 rar->file.last_size = size;
3559 /* Calculate the checksum of this new block before
3560 * submitting data to libarchive's engine. */
3561 update_crc(rar, d->buf, d->size);
3567 /* Program counter will reach this code if the `rar->cstate.data_ready`
3568 * stack will be filled up so that no new entries will be allowed. The
3569 * code shouldn't allow such situation to occur. So we treat this case
3570 * as an internal error. */
3572 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
3573 "Error: premature end of data_ready stack");
3574 return ARCHIVE_FATAL;
3577 /* This function uncompresses the data that is stored in the <FILE> base
3580 * The FILE base block looks like this:
3582 * <header><huffman tables><block_1><block_2>...<block_n>
3584 * The <header> is a block header, that is parsed in parse_block_header().
3585 * It's a "compressed_block_header" structure, containing metadata needed
3586 * to know when we should stop looking for more <block_n> blocks.
3588 * <huffman tables> contain data needed to set up the huffman tables, needed
3589 * for the actual decompression.
3591 * Each <block_n> consists of series of literals:
3593 * <literal><literal><literal>...<literal>
3595 * Those literals generate the uncompression data. They operate on a circular
3596 * buffer, sometimes writing raw data into it, sometimes referencing
3597 * some previous data inside this buffer, and sometimes declaring a filter
3598 * that will need to be executed on the data stored in the circular buffer.
3599 * It all depends on the literal that is used.
3601 * Sometimes blocks produce output data, sometimes they don't. For example, for
3602 * some huge files that use lots of filters, sometimes a block is filled with
3603 * only filter declaration literals. Such blocks won't produce any data in the
3606 * Sometimes blocks will produce 4 bytes of data, and sometimes 1 megabyte,
3607 * because a literal can reference previously decompressed data. For example,
3608 * there can be a literal that says: 'append a byte 0xFE here', and after
3609 * it another literal can say 'append 1 megabyte of data from circular buffer
3610 * offset 0x12345'. This is how RAR format handles compressing repeated
3613 * The RAR compressor creates those literals and the actual efficiency of
3614 * compression depends on what those literals are. The literals can also
3615 * be seen as a kind of a non-turing-complete virtual machine that simply
3616 * tells the decompressor what it should do.
3619 static int do_uncompress_file(struct archive_read* a) {
3620 struct rar5* rar = get_context(a);
3622 int64_t max_end_pos;
3624 if(!rar->cstate.initialized) {
3625 /* Don't perform full context reinitialization if we're
3626 * processing a solid archive. */
3627 if(!rar->main.solid || !rar->cstate.window_buf) {
3631 rar->cstate.initialized = 1;
3634 /* Don't allow extraction if window_size is invalid. */
3635 if(rar->cstate.window_size == 0) {
3636 archive_set_error(&a->archive,
3637 ARCHIVE_ERRNO_FILE_FORMAT,
3638 "Invalid window size declaration in this file");
3640 /* This should never happen in valid files. */
3641 return ARCHIVE_FATAL;
3644 if(rar->cstate.all_filters_applied == 1) {
3645 /* We use while(1) here, but standard case allows for just 1
3646 * iteration. The loop will iterate if process_block() didn't
3647 * generate any data at all. This can happen if the block
3648 * contains only filter definitions (this is common in big
3651 ret = process_block(a);
3652 if(ret == ARCHIVE_EOF || ret == ARCHIVE_FATAL)
3655 if(rar->cstate.last_write_ptr ==
3656 rar->cstate.write_ptr) {
3657 /* The block didn't generate any new data,
3658 * so just process a new block. */
3662 /* The block has generated some new data, so break
3668 /* Try to run filters. If filters won't be applied, it means that
3669 * insufficient data was generated. */
3670 ret = apply_filters(a);
3671 if(ret == ARCHIVE_RETRY) {
3673 } else if(ret == ARCHIVE_FATAL) {
3674 return ARCHIVE_FATAL;
3677 /* If apply_filters() will return ARCHIVE_OK, we can continue here. */
3679 if(cdeque_size(&rar->cstate.filters) > 0) {
3680 /* Check if we can write something before hitting first
3682 struct filter_info* flt;
3684 /* Get the block_start offset from the first filter. */
3685 if(CDE_OK != cdeque_front(&rar->cstate.filters,
3686 cdeque_filter_p(&flt)))
3688 archive_set_error(&a->archive,
3689 ARCHIVE_ERRNO_PROGRAMMER,
3690 "Can't read first filter");
3691 return ARCHIVE_FATAL;
3694 max_end_pos = rar5_min(flt->block_start,
3695 rar->cstate.write_ptr);
3697 /* There are no filters defined, or all filters were applied.
3698 * This means we can just store the data without any
3699 * postprocessing. */
3700 max_end_pos = rar->cstate.write_ptr;
3703 if(max_end_pos == rar->cstate.last_write_ptr) {
3704 /* We can't write anything yet. The block uncompression
3705 * function did not generate enough data, and no filter can be
3706 * applied. At the same time we don't have any data that can be
3707 * stored without filter postprocessing. This means we need to
3708 * wait for more data to be generated, so we can apply the
3711 * Signal the caller that we need more data to be able to do
3714 return ARCHIVE_RETRY;
3716 /* We can write the data before hitting the first filter.
3717 * So let's do it. The push_window_data() function will
3718 * effectively return the selected data block to the user
3720 push_window_data(a, rar, rar->cstate.last_write_ptr,
3722 rar->cstate.last_write_ptr = max_end_pos;
3728 static int uncompress_file(struct archive_read* a) {
3732 /* Sometimes the uncompression function will return a
3733 * 'retry' signal. If this will happen, we have to retry
3735 ret = do_uncompress_file(a);
3736 if(ret != ARCHIVE_RETRY)
3742 static int do_unstore_file(struct archive_read* a,
3743 struct rar5* rar, const void** buf, size_t* size, int64_t* offset)
3748 if(rar->file.bytes_remaining == 0 && rar->main.volume > 0 &&
3749 rar->generic.split_after > 0)
3753 rar->cstate.switch_multivolume = 1;
3754 ret = advance_multivolume(a);
3755 rar->cstate.switch_multivolume = 0;
3757 if(ret != ARCHIVE_OK) {
3758 /* Failed to advance to next multivolume archive
3764 to_read = rar5_min(rar->file.bytes_remaining, 64 * 1024);
3769 if(!read_ahead(a, to_read, &p)) {
3770 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
3771 "I/O error when unstoring file");
3772 return ARCHIVE_FATAL;
3775 if(ARCHIVE_OK != consume(a, to_read)) {
3780 if(size) *size = to_read;
3781 if(offset) *offset = rar->cstate.last_unstore_ptr;
3783 rar->file.bytes_remaining -= to_read;
3784 rar->cstate.last_unstore_ptr += to_read;
3786 update_crc(rar, p, to_read);
3790 static int do_unpack(struct archive_read* a, struct rar5* rar,
3791 const void** buf, size_t* size, int64_t* offset)
3793 enum COMPRESSION_METHOD {
3794 STORE = 0, FASTEST = 1, FAST = 2, NORMAL = 3, GOOD = 4,
3798 if(rar->file.service > 0) {
3799 return do_unstore_file(a, rar, buf, size, offset);
3801 switch(rar->cstate.method) {
3803 return do_unstore_file(a, rar, buf, size,
3814 return uncompress_file(a);
3816 archive_set_error(&a->archive,
3817 ARCHIVE_ERRNO_FILE_FORMAT,
3818 "Compression method not supported: 0x%x",
3819 rar->cstate.method);
3821 return ARCHIVE_FATAL;
3831 static int verify_checksums(struct archive_read* a) {
3833 struct rar5* rar = get_context(a);
3835 /* Check checksums only when actually unpacking the data. There's no
3836 * need to calculate checksum when we're skipping data in solid archives
3837 * (skipping in solid archives is the same thing as unpacking compressed
3838 * data and discarding the result). */
3840 if(!rar->skip_mode) {
3841 /* Always check checksums if we're not in skip mode */
3844 /* We can override the logic above with a compile-time option
3845 * NO_CRC_ON_SOLID_SKIP. This option is used during debugging,
3846 * and it will check checksums of unpacked data even when
3847 * we're skipping it. */
3849 #if defined CHECK_CRC_ON_SOLID_SKIP
3859 /* During unpacking, on each unpacked block we're calling the
3860 * update_crc() function. Since we are here, the unpacking
3861 * process is already over and we can check if calculated
3862 * checksum (CRC32 or BLAKE2sp) is the same as what is stored
3863 * in the archive. */
3864 if(rar->file.stored_crc32 > 0) {
3865 /* Check CRC32 only when the file contains a CRC32
3866 * value for this file. */
3868 if(rar->file.calculated_crc32 !=
3869 rar->file.stored_crc32) {
3870 /* Checksums do not match; the unpacked file
3874 printf("Checksum error: CRC32 "
3875 "(was: %08" PRIx32 ", expected: %08" PRIx32 ")\n",
3876 rar->file.calculated_crc32,
3877 rar->file.stored_crc32);
3880 #ifndef DONT_FAIL_ON_CRC_ERROR
3881 archive_set_error(&a->archive,
3882 ARCHIVE_ERRNO_FILE_FORMAT,
3883 "Checksum error: CRC32");
3884 return ARCHIVE_FATAL;
3888 printf("Checksum OK: CRC32 "
3889 "(%08" PRIx32 "/%08" PRIx32 ")\n",
3890 rar->file.stored_crc32,
3891 rar->file.calculated_crc32);
3896 if(rar->file.has_blake2 > 0) {
3897 /* BLAKE2sp is an optional checksum algorithm that is
3898 * added to RARv5 archives when using the `-htb` switch
3899 * during creation of archive.
3901 * We now finalize the hash calculation by calling the
3902 * `final` function. This will generate the final hash
3903 * value we can use to compare it with the BLAKE2sp
3904 * checksum that is stored in the archive.
3906 * The return value of this `final` function is not
3907 * very helpful, as it guards only against improper use.
3908 * This is why we're explicitly ignoring it. */
3911 (void) blake2sp_final(&rar->file.b2state, b2_buf, 32);
3913 if(memcmp(&rar->file.blake2sp, b2_buf, 32) != 0) {
3914 #ifndef DONT_FAIL_ON_CRC_ERROR
3915 archive_set_error(&a->archive,
3916 ARCHIVE_ERRNO_FILE_FORMAT,
3917 "Checksum error: BLAKE2");
3919 return ARCHIVE_FATAL;
3925 /* Finalization for this file has been successfully completed. */
3929 static int verify_global_checksums(struct archive_read* a) {
3930 return verify_checksums(a);
3934 * Decryption function for the magic signature pattern. Check the comment near
3935 * the `rar5_signature_xor` symbol to read the rationale behind this.
3937 static void rar5_signature(char *buf) {
3940 for(i = 0; i < sizeof(rar5_signature_xor); i++) {
3941 buf[i] = rar5_signature_xor[i] ^ 0xA1;
3945 static int rar5_read_data(struct archive_read *a, const void **buff,
3946 size_t *size, int64_t *offset) {
3948 struct rar5* rar = get_context(a);
3953 if(rar->file.dir > 0) {
3954 /* Don't process any data if this file entry was declared
3955 * as a directory. This is needed, because entries marked as
3956 * directory doesn't have any dictionary buffer allocated, so
3957 * it's impossible to perform any decompression. */
3958 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
3959 "Can't decompress an entry marked as a directory");
3960 return ARCHIVE_FAILED;
3963 if(!rar->skip_mode && (rar->cstate.last_write_ptr > rar->file.unpacked_size)) {
3964 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
3965 "Unpacker has written too many bytes");
3966 return ARCHIVE_FATAL;
3969 ret = use_data(rar, buff, size, offset);
3970 if(ret == ARCHIVE_OK) {
3974 if(rar->file.eof == 1) {
3978 ret = do_unpack(a, rar, buff, size, offset);
3979 if(ret != ARCHIVE_OK) {
3983 if(rar->file.bytes_remaining == 0 &&
3984 rar->cstate.last_write_ptr == rar->file.unpacked_size)
3986 /* If all bytes of current file were processed, run
3989 * Finalization will check checksum against proper values. If
3990 * some of the checksums will not match, we'll return an error
3991 * value in the last `archive_read_data` call to signal an error
3995 return verify_global_checksums(a);
4001 static int rar5_read_data_skip(struct archive_read *a) {
4002 struct rar5* rar = get_context(a);
4004 if(rar->main.solid) {
4005 /* In solid archives, instead of skipping the data, we need to
4006 * extract it, and dispose the result. The side effect of this
4007 * operation will be setting up the initial window buffer state
4008 * needed to be able to extract the selected file. */
4012 /* Make sure to process all blocks in the compressed stream. */
4013 while(rar->file.bytes_remaining > 0) {
4014 /* Setting the "skip mode" will allow us to skip
4015 * checksum checks during data skipping. Checking the
4016 * checksum of skipped data isn't really necessary and
4017 * it's only slowing things down.
4019 * This is incremented instead of setting to 1 because
4020 * this data skipping function can be called
4024 /* We're disposing 1 block of data, so we use triple
4025 * NULLs in arguments. */
4026 ret = rar5_read_data(a, NULL, NULL, NULL);
4028 /* Turn off "skip mode". */
4031 if(ret < 0 || ret == ARCHIVE_EOF) {
4032 /* Propagate any potential error conditions
4038 /* In standard archives, we can just jump over the compressed
4039 * stream. Each file in non-solid archives starts from an empty
4042 if(ARCHIVE_OK != consume(a, rar->file.bytes_remaining)) {
4043 return ARCHIVE_FATAL;
4046 rar->file.bytes_remaining = 0;
4052 static int64_t rar5_seek_data(struct archive_read *a, int64_t offset,
4059 /* We're a streaming unpacker, and we don't support seeking. */
4061 return ARCHIVE_FATAL;
4064 static int rar5_cleanup(struct archive_read *a) {
4065 struct rar5* rar = get_context(a);
4067 free(rar->cstate.window_buf);
4068 free(rar->cstate.filtered_buf);
4070 free(rar->vol.push_buf);
4073 cdeque_free(&rar->cstate.filters);
4076 a->format->data = NULL;
4081 static int rar5_capabilities(struct archive_read * a) {
4086 static int rar5_has_encrypted_entries(struct archive_read *_a) {
4089 /* Unsupported for now. */
4090 return ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED;
4093 static int rar5_init(struct rar5* rar) {
4094 memset(rar, 0, sizeof(struct rar5));
4096 if(CDE_OK != cdeque_init(&rar->cstate.filters, 8192))
4097 return ARCHIVE_FATAL;
4102 int archive_read_support_format_rar5(struct archive *_a) {
4103 struct archive_read* ar;
4107 if(ARCHIVE_OK != (ret = get_archive_read(_a, &ar)))
4110 rar = malloc(sizeof(*rar));
4112 archive_set_error(&ar->archive, ENOMEM,
4113 "Can't allocate rar5 data");
4114 return ARCHIVE_FATAL;
4117 if(ARCHIVE_OK != rar5_init(rar)) {
4118 archive_set_error(&ar->archive, ENOMEM,
4119 "Can't allocate rar5 filter buffer");
4121 return ARCHIVE_FATAL;
4124 ret = __archive_read_register_format(ar,
4131 rar5_read_data_skip,
4135 rar5_has_encrypted_entries);
4137 if(ret != ARCHIVE_OK) {
4138 (void) rar5_cleanup(ar);