From f75a5c888ab8b6a25653990d10cbe025bcef2c89 Mon Sep 17 00:00:00 2001 From: Karol Lewandowski Date: Wed, 24 Aug 2022 15:12:17 +0200 Subject: [PATCH] Import bsdiff from libtota This commit import tools to generate binary deltas from libtota commit 143447ad7 ("ss_bsdiff: Fix to speed up patch generation") Change-Id: I3ef5abd6065b476ffbd682c92bf306edd9a2ea4f --- LICENSE.Apache-2.0 | 202 ++++++++++++ LICENSE.BSD-2-Clause | 23 ++ bsdiff/CMakeLists.txt | 28 ++ bsdiff/ss_brotli_patch.c | 355 ++++++++++++++++++++ bsdiff/ss_brotli_patch.h | 22 ++ bsdiff/ss_bsdiff.c | 805 +++++++++++++++++++++++++++++++++++++++++++++ bsdiff/ss_bspatch.c | 155 +++++++++ bsdiff/ss_bspatch_common.c | 291 ++++++++++++++++ bsdiff/ss_bspatch_common.h | 17 + 9 files changed, 1898 insertions(+) create mode 100644 LICENSE.Apache-2.0 create mode 100644 LICENSE.BSD-2-Clause create mode 100644 bsdiff/CMakeLists.txt create mode 100644 bsdiff/ss_brotli_patch.c create mode 100644 bsdiff/ss_brotli_patch.h create mode 100644 bsdiff/ss_bsdiff.c create mode 100644 bsdiff/ss_bspatch.c create mode 100644 bsdiff/ss_bspatch_common.c create mode 100644 bsdiff/ss_bspatch_common.h diff --git a/LICENSE.Apache-2.0 b/LICENSE.Apache-2.0 new file mode 100644 index 0000000..fef8c29 --- /dev/null +++ b/LICENSE.Apache-2.0 @@ -0,0 +1,202 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/LICENSE.BSD-2-Clause b/LICENSE.BSD-2-Clause new file mode 100644 index 0000000..e9cc3cf --- /dev/null +++ b/LICENSE.BSD-2-Clause @@ -0,0 +1,23 @@ +Copyright 2003-2005 Colin Percival +Copyright 2012 Matthew Endsley +All rights reserved + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/bsdiff/CMakeLists.txt b/bsdiff/CMakeLists.txt new file mode 100644 index 0000000..ecd7205 --- /dev/null +++ b/bsdiff/CMakeLists.txt @@ -0,0 +1,28 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 2.6) +PROJECT(ss_bsdiff C) + +SET(ss_bsdiff_SRCS ss_bsdiff.c) +SET(ss_bspatch_SRCS + ss_bspatch_common.c + ss_bspatch.c +) + +INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/bsdiff) + +INCLUDE(FindPkgConfig) +pkg_check_modules(${PROJECT_NAME}_pkgs REQUIRED liblzma-tool libdivsufsort libbrotlienc) + +FOREACH(flag ${${PROJECT_NAME}_pkgs_CFLAGS}) + SET(EXTRA_CFLAGS "${EXTRA_CFLAGS} ${flag}") +ENDFOREACH(flag) + +SET(EXTRA_CFLAGS "${EXTRA_CFLAGS} -I./include") +SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_CFLAGS}") + +ADD_EXECUTABLE(ss_bsdiff ${ss_bsdiff_SRCS}) +TARGET_LINK_LIBRARIES(ss_bsdiff ${${PROJECT_NAME}_pkgs_LDFLAGS} "-g" "-pthread") +INSTALL(TARGETS ss_bsdiff DESTINATION bin) + +ADD_EXECUTABLE(ss_bspatch ${ss_bspatch_SRCS}) +TARGET_LINK_LIBRARIES(ss_bspatch ${${PROJECT_NAME}_pkgs_LDFLAGS} "-g" "-pthread") +INSTALL(TARGETS ss_bspatch DESTINATION bin) diff --git a/bsdiff/ss_brotli_patch.c b/bsdiff/ss_brotli_patch.c new file mode 100644 index 0000000..6574d80 --- /dev/null +++ b/bsdiff/ss_brotli_patch.c @@ -0,0 +1,355 @@ +/* + * libtota + * + * Copyright (c) 2022 Samsung Electronics Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the License); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fota_log.h" + +#define PF_OK 0 +#define PF_ERROR_OPEN_FILE 1 +#define PF_ERROR_MMAP 2 +#define PF_ERROR_INVALID_PATCH_FILE 3 +#define PF_ERROR_DECOMPRESSION 4 + +#define BUFF_IN_LEN 4096 +#define BUFF_OUT_LEN 4096 +#define SSINT_LEN 8 +#define BLOCK_COUNT_REPORT 5000 + +const char SSDIFF_MAGIC[] = "SSDIFF40"; + +struct bs_data { + int src_fd, dest_fd, patch_fd; + void *src_ptr, *dest_ptr, *patch_ptr; + size_t src_len, dest_len, patch_len; + unsigned char buff_in[BUFF_IN_LEN]; + unsigned char buff_out[BUFF_IN_LEN]; + uint8_t *dest_pos; + uint8_t *src_pos; + size_t available_in, available_out; + const uint8_t *compressed_pos; + uint8_t *decompressed_pos; + size_t total_size; + BrotliDecoderState *bstate; +}; + +static void free_data(struct bs_data *data) +{ + if (data == NULL) + return; + + if (data->src_ptr) munmap(data->src_ptr, data->src_len); + if (data->dest_ptr) munmap(data->dest_ptr, data->dest_len); + if (data->patch_ptr) munmap(data->patch_ptr, data->patch_len); + + if (data->src_fd) close(data->src_fd); + if (data->patch_fd) close(data->patch_fd); + if (data->dest_fd) close(data->dest_fd); +} + +static int open_file(char *file_name, int mode) +{ + assert(file_name); + int fd = open(file_name, mode, S_IWUSR | S_IRUSR); + if (fd < 0) + LOGE("Open file %s error: %m (%d)\n", file_name, errno); + return fd; +} + +static size_t get_file_len(int fd) +{ + assert(fd >= 0); + size_t result = lseek(fd, 0, SEEK_END); + lseek(fd, 0, SEEK_SET); + return result; +} + + +static size_t decompress_bytes(struct bs_data *data, size_t keep_offset) +{ + assert(data); + if (keep_offset > 0) { + memcpy(data->buff_out, data->buff_out + sizeof(data->buff_out) - keep_offset, keep_offset); + } + data->decompressed_pos = data->buff_out + keep_offset; + data->available_out = sizeof(data->buff_out) - keep_offset; + + BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT; + + result = BrotliDecoderDecompressStream(data->bstate, + &data->available_in, + &data->compressed_pos, + &data->available_out, + &data->decompressed_pos, + &data->total_size); + + if (result == BROTLI_DECODER_RESULT_ERROR) { + LOGE("Decoder error\n"); + return PF_ERROR_DECOMPRESSION; + } else if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) { + LOGE("Invalid source file\n"); + return PF_ERROR_DECOMPRESSION; + } + + return PF_OK; +} + +static int open_files(struct bs_data *data, char *source_file, size_t source_size, char *dest_file, size_t dest_size, char *patch_file) +{ + assert(data); + assert(source_file); + assert(dest_file); + assert(patch_file); + + data->src_fd = open_file(source_file, O_RDONLY); + data->patch_fd = open_file(patch_file, O_RDONLY); + data->dest_fd = open_file(dest_file, O_RDWR); + if (data->src_fd < 0 || + data->patch_fd < 0 || + data->dest_fd < 0) + return PF_ERROR_OPEN_FILE; + + data->src_len = source_size; + data->patch_len = get_file_len(data->patch_fd); + data->dest_len = dest_size; + + data->src_ptr = mmap(NULL, data->src_len, PROT_READ, MAP_PRIVATE, data->src_fd, 0); + if (data->src_ptr == MAP_FAILED) { + LOGE("mmap source file error: %m (%d)", errno); + return PF_ERROR_MMAP; + } + + data->patch_ptr = mmap(NULL, data->patch_len, PROT_READ, MAP_PRIVATE, data->patch_fd, 0); + if (data->patch_ptr == MAP_FAILED) { + LOGE("mmap patch file error: %m (%d)", errno); + return PF_ERROR_MMAP; + } + + data->dest_ptr = mmap(NULL, data->dest_len, PROT_WRITE, MAP_SHARED, data->dest_fd, 0); + if (data->dest_ptr == MAP_FAILED) { + LOGE("mmap destination error: %m (%d)\n", errno); + return PF_ERROR_MMAP; + } + + data->compressed_pos = data->patch_ptr; + data->available_in = data->patch_len; + + return PF_OK; +} + +static void init_data(struct bs_data *data) +{ + assert(data); + + data->src_fd = -1; + data->patch_fd = -1; + data->dest_fd = -1; + data->src_ptr = NULL; + data->dest_ptr = NULL; + data->patch_ptr = NULL; + data->src_len = 0; + data->dest_len = 0; + data->patch_len = 0; + data->available_in = 0; + data->compressed_pos = 0; + data->available_out = 0; + data->decompressed_pos = 0; + data->bstate = BrotliDecoderCreateInstance(NULL, NULL, NULL); +} + +static int64_t parse_ssint(unsigned char *buff) +{ + assert(buff); + /* + * From bsdiff 4.0 documentation: + * + * INTEGER type: + * + * offset size data type value + * 0 1 byte x0 + * 1 1 byte x1 + * 2 1 byte x2 + * 3 1 byte x3 + * 4 1 byte x4 + * 5 1 byte x5 + * 6 1 byte x6 + * 7 1 byte x7 + 128 * s + * + * The values x0, x2, x2, x3, x4, x5, x6 are between 0 and 255 (inclusive). + * The value x7 is between 0 and 127 (inclusive). The value s is 0 or 1. + * + * The INTEGER is parsed as: + * (x0 + x1 * 256 + x2 * 256^2 + x3 * 256^3 + x4 * 256^4 + + * x5 * 256^5 + x6 * 256^6 + x7 * 256^7) * (-1)^s + * + * (In other words, an INTEGER is a 64-byte signed integer in sign-magnitude + * format, stored in little-endian byte order.) + */ + int64_t result = *(int64_t*)buff & 0x7fffffff; + if ((buff[7] & 0x80) != 0) + result = -result; + + return result; +} + +int read_header(struct bs_data *data, uint8_t **buff_out_pos) +{ + assert(data); + assert(buff_out_pos); + + *buff_out_pos = data->buff_out; + + if (*buff_out_pos + sizeof(SSDIFF_MAGIC) > data->decompressed_pos || + memcmp(data->buff_out, SSDIFF_MAGIC, sizeof(SSDIFF_MAGIC) - 1) != 0) { + LOGE("Invalid patch file\n"); + return PF_ERROR_INVALID_PATCH_FILE; + } else { + LOGL(LOG_SSENGINE, "Looks like SSDIFF\n"); + } + + *buff_out_pos += sizeof(SSDIFF_MAGIC) - 1; + + if (*buff_out_pos + SSINT_LEN > data->decompressed_pos) { + decompress_bytes(data, data->decompressed_pos - *buff_out_pos); + *buff_out_pos = data->buff_out; + } + + size_t target_size = parse_ssint(*buff_out_pos); + LOGL(LOG_SSENGINE, "target_size: 0x%lx (%ld)\n", target_size, target_size); + + if (target_size != data->dest_len) { + LOGE("Declared target size differs from that read from the patch\n"); + return PF_ERROR_INVALID_PATCH_FILE; + } + + *buff_out_pos += SSINT_LEN; + + return PF_OK; +} + +int apply_patch_brotli(char *source_file, size_t source_size, char *dest_file, size_t dest_size, char *patch_file) +{ + assert(source_file); + assert(dest_file); + assert(patch_file); + + int result; + uint64_t blocks = 0; + struct bs_data data; + + init_data(&data); + + if ((result = open_files(&data, source_file, source_size, dest_file, dest_size, patch_file)) != PF_OK) + goto exit; + + if ((result = decompress_bytes(&data, 0)) != PF_OK) + goto exit; + + uint8_t *buff_out_pos; + + if ((result = read_header(&data, &buff_out_pos)) != PF_OK) + goto exit; + + uint64_t total_write = 0; + + while (total_write < data.dest_len) { + /* + * Make sure we can read the block header + */ + if (buff_out_pos + 4*8 > data.decompressed_pos) { + if ((result = decompress_bytes(&data, data.decompressed_pos - buff_out_pos)) != PF_OK) + goto exit; + buff_out_pos = data.buff_out; + } + + /* + * Read the block header + */ + int64_t diff_len = parse_ssint(buff_out_pos+0*8); + int64_t extra_len = parse_ssint(buff_out_pos+1*8); + int64_t old_pos = parse_ssint(buff_out_pos+2*8); + int64_t new_pos = parse_ssint(buff_out_pos+3*8); + buff_out_pos += 4*8; + + /* + * Prepare pointers + */ + data.dest_pos = data.dest_ptr + new_pos; + data.src_pos = data.src_ptr + old_pos; + /* + * Read diff data + */ + int64_t write = 0; + while (write < diff_len) { + if (buff_out_pos >= data.decompressed_pos) { + if ((result = decompress_bytes(&data, 0)) != PF_OK) + goto exit; + buff_out_pos = data.buff_out; + } + while (write < diff_len && buff_out_pos < data.decompressed_pos) { + *data.dest_pos = *(uint8_t*)(data.src_pos) + *(uint8_t*)buff_out_pos; + data.dest_pos++; + data.src_pos++; + buff_out_pos++; + write++; + } + } + total_write += write; + /* + * Read extra data + */ + write = 0; + while (write < extra_len) { + if (buff_out_pos >= data.decompressed_pos) { + if ((result = decompress_bytes(&data, 0)) != PF_OK) + goto exit; + buff_out_pos = data.buff_out; + } + int64_t chunk_size = extra_len - write; + if (buff_out_pos + chunk_size > data.decompressed_pos) { + chunk_size = data.decompressed_pos - buff_out_pos; + } + memcpy(data.dest_pos, buff_out_pos, chunk_size); + data.dest_pos += chunk_size; + buff_out_pos += chunk_size; + write += chunk_size; + } + total_write += write; + + blocks++; + if (blocks % BLOCK_COUNT_REPORT == 0) { + printf("Number of processed patch blocks: %lld\n", blocks); + } + } + + result = PF_OK; + +exit: + printf("Total processed blocks: %lld\n", blocks); + free_data(&data); + return result; +} diff --git a/bsdiff/ss_brotli_patch.h b/bsdiff/ss_brotli_patch.h new file mode 100644 index 0000000..47694b9 --- /dev/null +++ b/bsdiff/ss_brotli_patch.h @@ -0,0 +1,22 @@ +/* + * libtota + * + * Copyright (c) 2022 Samsung Electronics Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the License); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +extern int apply_patch_brotli(char *source_file, size_t source_size, char *dest_file, size_t dest_size, char *patch_file); diff --git a/bsdiff/ss_bsdiff.c b/bsdiff/ss_bsdiff.c new file mode 100644 index 0000000..76fbe41 --- /dev/null +++ b/bsdiff/ss_bsdiff.c @@ -0,0 +1,805 @@ +/*- + * Copyright 2003-2005 Colin Percival + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +// This file is a nearly copy of bsdiff.c from the +// bsdiff-4.3 distribution; the primary differences being how the +// input and output data are read, data post processing, +// search function modification and the error handling. + +#define _CRT_SECURE_NO_WARNINGS +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include <7zFile.h> +#include <7zVersion.h> +#include +#include +#include + +#define SUFSORT_MOD // Change suffix sorting algorithm from Qsufsort to Divsufsort +//#define ZLIB_MOD // Change compression algorithm +#define SEARCH_RECURSIVE // Change recursion of Search function to Iteration +//#define MAX_MATCH_SIZE // define ( MAX_MATCH_SIZE or CONST_MEMORY_USAGE ) or ( none of them ); use max_match memory for old file at patch side +#define CONST_MEMORY_USAGE 16384 // patch in m+O(1); m=size of new file; use only const memory for old file at patch side; +#define PATCH_FILE_FORMAT_MOD // no accumulation of diff and extra in db and eb; immediate write; also write all 3 parts of control stmt at same time +#define MULTI_THREADING 1 // only with #define CONST_MEMORY_USAGE or #define MAX_MATCH_SIZE +#define TIME_LIMIT_CHECK 4*60*60 // After TIME_LIMIT_CHECK seconds, new diff will be created with smaller score argument +#define TEMP_PATCH_NAME "temp_patch" +#define BROTLI_COMPRESSION_QUALITY 9 +#define ITERATIONS_COMPLETED 10 // After ITERATIONS_COMPLETED iterations elapsed time will be checked. Increasing it can cause the program to run longer than expected + // (the longer the program runs the longer the iterations) + +/* Take care : +1) Use either (MAX_MATCH_SIZE or CONST_MEMORY_USAGE) or (none of both). +2.) PATCH_FILE_FORMAT_MOD may or may not be used, independently of everything else. +3.) MULTI_THREADING can be used only with (CONST_MEMORY_USAGE or MAX_MATCH_SIZE). +*/ +#ifdef TIME_LIMIT_CHECK +long long outer_count = 0; +char ts1[256]; +void get_time_stamp(void) +{ + struct timeval tv; + int sec, msec; + + gettimeofday(&tv, NULL); + sec = (int)tv.tv_sec; + msec = (int)(tv.tv_usec / 1000); + snprintf(ts1, 256, "%06d.%03d", sec % 100000, msec); +} +#endif +#ifdef SUFSORT_MOD +//supporting only 32 bit divsufsort for now. +#include +#endif + +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) + +#ifdef MULTI_THREADING + +struct data_thread { + unsigned num_threads; + off_t size_thread; + + int fd; + int rc; + u_char *old; + u_char **new; + off_t oldsize; + off_t newsize; + + saidx_t *I; + u_char buf[8]; + u_char header[16]; + u_char buf2[32]; + + off_t lenn; + off_t lenn2; + + FILE * pf; + int bz2err; + FILE * pfbz2; +}; + +enum compression_method { + CM_LZMA, + CM_BROTLI, +}; + +struct bsdiff_info { + const char *old_file; + const char *new_file; + const char *patch_file; + enum compression_method comp_method; +}; + +struct data_thread data; + +int Function(int); + + +#endif + +static off_t matchlen(u_char *old, off_t oldsize, u_char *new, off_t newsize) +{ + off_t i; + for (i = 0; (i < oldsize) && (i < newsize); i++) + if (old[i] != new[i]) + break; + return i; +} + +static off_t search(saidx_t *I, u_char *old, off_t oldsize, + u_char *new, off_t newsize, off_t st, off_t en, off_t *pos) +{ + off_t x, y; + while (en - st >= 2) { + x = st + (en - st) / 2; + if (memcmp(old + I[x], new, MIN(oldsize - I[x], newsize)) < 0) + st = x; + else + en = x; + } + + x = matchlen(old + I[st], oldsize - I[st], new, newsize); + y = matchlen(old + I[en], oldsize - I[en], new, newsize); + + if (x > y) { + *pos = I[st]; + return x; + } else { + *pos = I[en]; + return y; + } +} + +static void offtout(off_t x, u_char *buf) +{ + off_t y; + + if (x < 0) + y = -x; + else + y = x; + + buf[0] = y % 256; + y -= buf[0]; + y = y / 256; + buf[1] = y % 256; + y -= buf[1]; + y = y / 256; + buf[2] = y % 256; + y -= buf[2]; + y = y / 256; + buf[3] = y % 256; + y -= buf[3]; + y = y / 256; + buf[4] = y % 256; + y -= buf[4]; + y = y / 256; + buf[5] = y % 256; + y -= buf[5]; + y = y / 256; + buf[6] = y % 256; + y -= buf[6]; + y = y / 256; + buf[7] = y % 256; + + if (x < 0) + buf[7] |= 0x80; +} + +static off_t bulk_read(int fd, u_char * buf, off_t size) +{ + off_t bytes_read = 0; + int ret = 0; + + while(bytes_read < size) + { + ret = read(fd, buf + bytes_read, size - bytes_read); + if (ret > 0) + bytes_read += ret; + else + break; + } + return bytes_read; +} + +int create_patch(const char *old_file, const char *new_file, const char *temp_patch, int offset_oldscore) +{ + assert(old_file); + assert(new_file); + + data.num_threads = MULTI_THREADING; + data.new = (u_char **)malloc(sizeof(u_char *)*data.num_threads); + + /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure + that we never try to malloc(0) and get a NULL pointer */ + if (((data.fd = open(old_file, O_RDONLY, 0)) < 0) || + ((data.oldsize = lseek(data.fd, 0, SEEK_END)) == -1) || + ((data.old = malloc(data.oldsize + 1)) == NULL) || + (lseek(data.fd, 0, SEEK_SET) != 0) || + (bulk_read(data.fd, data.old, data.oldsize) != data.oldsize) || + (close(data.fd) == -1)) + err(1, "%s", old_file); + + data.I = malloc((data.oldsize + 1) * sizeof(saidx_t)); + if (!data.I) + err(1, "Memory allocation error: %s", old_file); + divsufsort(data.old, data.I, data.oldsize); + + /* Allocate newsize+1 bytes instead of newsize bytes to ensure + that we never try to malloc(0) and get a NULL pointer */ + if (((data.fd = open(new_file, O_RDONLY, 0)) < 0) || + ((data.newsize = lseek(data.fd, 0, SEEK_END)) == -1) || + (lseek(data.fd, 0, SEEK_SET) != 0)) + err(1, "%s", new_file); + data.size_thread = (data.newsize / data.num_threads); + + unsigned int j; + for (j = 0; j < data.num_threads; ++j) { + if (j != data.num_threads - 1) { + if (((data.new[j] = (u_char *)malloc(sizeof(u_char) * (data.size_thread + 1))) == NULL) || + (lseek(data.fd, 0, SEEK_CUR) != j * data.size_thread) || + (bulk_read(data.fd, data.new[j], data.size_thread) != data.size_thread)) + err(1, "%s", new_file); + } else { + if (((data.new[j] = (u_char *)malloc(sizeof(u_char) * (data.newsize - (j * data.size_thread) + 1))) == NULL) || + (lseek(data.fd, 0, SEEK_CUR) != j * data.size_thread) || + (bulk_read(data.fd, data.new[j], data.newsize - (j * data.size_thread)) != data.newsize - (j * data.size_thread))) + err(1, "here %s", new_file); + } + } + + if ((close(data.fd) == -1)) + err(1, "%s", new_file); + + /* Create the patch file */ + if ((data.pf = fopen(temp_patch, "w")) == NULL) + err(1, "%s", temp_patch); + + /* Header is + 0 8 "BSDIFF40" + 8 8 length of bzip2ed ctrl block + 16 8 length of bzip2ed diff block + 24 8 length of new file */ + /* File is + 0 32 Header + 32 ?? Bzip2ed ctrl block + ?? ?? Bzip2ed diff block + ?? ?? Bzip2ed extra block */ + memcpy(data.header, "SSDIFF40", 8); + + offtout(data.newsize, data.header + 8); + if (fwrite(data.header, 16, 1, data.pf) != 1) + err(1, "fwrite(%s)", temp_patch); + + /* Compute the differences, writing ctrl as we go */ + data.pfbz2 = data.pf; + //if ((data.pfbz2 = BZ2_bzWriteOpen(&data.bz2err, data.pf, 9, 0, 0)) == NULL) + //errx(1, "BZ2_bzWriteOpen, bz2err = %d", data.bz2err); + + + //BZ2_bzWriteClose(&data.bz2err, data.pfbz2, 0, NULL, NULL); + //if (data.bz2err != BZ_OK) + //errx(1, "BZ2_bzWriteClose, bz2err = %d", data.bz2err); + + int ret = Function(offset_oldscore); +#ifdef TIME_LIMIT_CHECK + if (ret != 0) { + printf("bsdiff fails to create delta with offset score %d\n", offset_oldscore); + printf("Old: [%s] -> New: [%s]\n", old_file, new_file); + } +#endif + /* Seek to the beginning, write the header, and close the file */ + if (fseeko(data.pf, 0, SEEK_SET)) + err(1, "fseeko"); + + if (fwrite(data.header, 16, 1, data.pf) != 1) + err(1, "fwrite(%s)", temp_patch); + if (fclose(data.pf)) + err(1, "fclose"); + /* Free the memory we used */ + free(data.I); + free(data.old); + free(data.new); + + return ret; +} + +int Function(int offset_oldscore) +{ + unsigned int thread_num = 0; + off_t end; + off_t scan = 0; + end = data.newsize; + int t1 = 0, t2 = 0; + get_time_stamp(); //total time capturing + t1 = atoi(ts1); + +#ifdef PATCH_FILE_FORMAT_MOD + u_char* db; + u_char* eb; + off_t dblen; + off_t eblen; +#endif + + off_t pos; + off_t len; + off_t lastscan; + off_t lastpos; + off_t lastoffset; + off_t oldscore; + off_t scsc; + off_t s; + off_t Sf; + off_t lenf; + off_t Sb; + off_t lenb; + off_t overlap; + off_t Ss; + off_t lens; + off_t i; + + pos = 0; + len = 0; + lastscan = 0; + lastpos = 0; + lastoffset = 0; + while (scan < end) { + oldscore = 0; + size_t prev_len; + uint64_t prev_pos, prev_oldscore; + int num_less_than_eight = 0; + for (scsc = scan += len; scan < end; scan++) { + prev_len = len; + prev_oldscore = oldscore; + prev_pos = pos; + len = search(data.I, data.old, data.oldsize, data.new[thread_num] + scan, end - scan, + len, data.oldsize, &pos); // Passing parameter as len instead of 0 for ramdisk.img etc taking long time + + for (; scsc < scan + len; scsc++) + if ((scsc + lastoffset < data.oldsize) && + (data.old[scsc + lastoffset] == data.new[thread_num][scsc])) + oldscore++; +#ifdef TIME_LIMIT_CHECK + if (offset_oldscore > 4) // when offset_oldscore is 4 and less we have to make sure diff is created no mater what, so we can't timeout + outer_count++; + if (outer_count > ITERATIONS_COMPLETED) { + outer_count = 0; + get_time_stamp(); //total time capturing + t2 = atoi(ts1); + //printf("\ntime diff = %d\n", (t2 - t1)); + if ((t2 - t1) > TIME_LIMIT_CHECK) + return 1; + } +#endif + if (((len == oldscore) && (len != 0)) || + (len > oldscore + offset_oldscore)) + break; + + if ((scan + lastoffset < data.oldsize) && + (data.old[scan + lastoffset] == data.new[thread_num][scan])) + oldscore--; + + /* + * Modification created by Thieu Le + * which speeds up patch generation time in situations + * where large blocks of data differ by less than 8 bytes. + * https://android.googlesource.com/platform/external/bsdiff/+/d172820cb8b4513478f0db5546d6e4d388adc1a7 + */ + const size_t fuzz = 8; + if (prev_len - fuzz <= len && len <= prev_len && + prev_oldscore - fuzz <= oldscore && + prev_pos <= pos && pos <= prev_pos + fuzz && + oldscore <= len && len <= oldscore + fuzz) { + num_less_than_eight++; + } else { + num_less_than_eight = 0; + } + if (num_less_than_eight > 100) break; + + }; + if ((len != oldscore) || (scan == end)) { + s = 0; + Sf = 0; + lenf = 0; + for (i = 0; (lastscan + i < scan) && (lastpos + i < data.oldsize); ) { + if (data.old[lastpos + i] == data.new[thread_num][lastscan + i]) + s++; + i++; + if (s * 2 - i > Sf * 2 - lenf) { + Sf = s; + lenf = i; + }; + }; + + lenb = 0; + if (scan < end) { + s = 0; + Sb = 0; + for (i = 1; (scan >= lastscan + i) && (pos >= i); i++) { + if (data.old[pos - i] == data.new[thread_num][scan - i]) + s++; + if (s * 2 - i > Sb * 2 - lenb) { + Sb = s; + lenb = i; + }; + }; + }; + + if (lastscan + lenf > scan - lenb) { + overlap = (lastscan + lenf) - (scan - lenb); + s = 0; + Ss = 0; + lens = 0; + for (i = 0; i < overlap; i++) { + if (data.new[thread_num][lastscan + lenf - overlap + i] == + data.old[lastpos + lenf - overlap + i]) + s++; + if (data.new[thread_num][scan - lenb + i] == + data.old[pos - lenb + i]) + s--; + if (s > Ss) { + Ss = s; + lens = i + 1; + }; + }; + + lenf += lens - overlap; + lenb -= lens; + }; + + if (((db = malloc(lenf + 1)) == NULL) || + ((eb = malloc((scan - lenb) - (lastscan + lenf) + 1)) == NULL)) + err(1, NULL); + + for (i = 0; i < lenf; i++) + db[i] = data.new[thread_num][lastscan + i] - data.old[lastpos + i]; + for (i = 0; i < (scan - lenb) - (lastscan + lenf); i++) + eb[i] = data.new[thread_num][lastscan + lenf + i]; + dblen = lenf; + eblen = (scan - lenb) - (lastscan + lenf); + offtout(lenf, data.buf2); + offtout((scan - lenb) - (lastscan + lenf), data.buf2 + 8); + offtout(lastpos, data.buf2 + 16); + offtout((data.size_thread * thread_num) + lastscan, data.buf2 + 24); + fwrite(data.buf2, 1, 32, data.pf); + //if (data.bz2err != BZ_OK) + //errx(1, "fwrite, bz2err = %d", data.bz2err); + + fwrite(db, 1, dblen, data.pf); + //if (data.bz2err != BZ_OK) + //errx(1, "fwrite, bz2err = %d", data.bz2err); + + fwrite(eb, 1, eblen, data.pf); + //if (data.bz2err != BZ_OK) + //errx(1, "fwrite, bz2err = %d", data.bz2err); + + free(db); + free(eb); + + lastscan = scan - lenb; + lastpos = pos - lenb; + lastoffset = pos - scan; + }; + }; + return 0; +} + +const char *kCantReadMessage = "Can not read input file"; +const char *kCantWriteMessage = "Can not write output file"; +const char *kCantAllocateMessage = "Can not allocate memory"; +const char *kDataErrorMessage = "Data error"; + +static void *SzAlloc(void *p, size_t size) +{ + p = p; + return MyAlloc(size); +} +static void SzFree(void *p, void *address) +{ + p = p; + MyFree(address); +} +static ISzAlloc g_Alloc = { SzAlloc, SzFree }; + +int PrintError(char *buffer, const char *message, int buf_size) +{ + snprintf(buffer + strlen(buffer), buf_size - strlen(buffer), + "\nError: %s\n", message); + return 1; +} + +int PrintErrorNumber(char *buffer, SRes val, int buf_size) +{ + snprintf(buffer + strlen(buffer), buf_size - strlen(buffer), + "\nError code: %x\n", (unsigned)val); + return 1; +} + +int PrintUserError(char *buffer, int buf_size) +{ + return PrintError(buffer, "Incorrect command", buf_size); +} + +#define IN_BUF_SIZE (1 << 16) +#define OUT_BUF_SIZE (1 << 16) + + +static SRes lzma_encode(ISeqOutStream *outStream, ISeqInStream *inStream, UInt64 fileSize, char *rs) +{ + CLzmaEncHandle enc; + SRes res; + CLzmaEncProps props; + + rs = rs; + + enc = LzmaEnc_Create(&g_Alloc); + if (enc == 0) + return SZ_ERROR_MEM; + + LzmaEncProps_Init(&props); + res = LzmaEnc_SetProps(enc, &props); + + if (res == SZ_OK) { + Byte header[LZMA_PROPS_SIZE + 8]; + size_t headerSize = LZMA_PROPS_SIZE; + int i; + + res = LzmaEnc_WriteProperties(enc, header, &headerSize); + for (i = 0; i < 8; i++) + header[headerSize++] = (Byte)(fileSize >> (8 * i)); + if (outStream->Write(outStream, header, headerSize) != headerSize) + res = SZ_ERROR_WRITE; + else { + if (res == SZ_OK) + res = LzmaEnc_Encode(enc, outStream, inStream, NULL, &g_Alloc, &g_Alloc); + } + } + LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc); + return res; +} + +int lzma_compress(const char *input_file, const char *output_file, char *rs, int rs_size) +{ + assert(input_file); + assert(output_file); + assert(rs); + + CFileSeqInStream inStream; + CFileOutStream outStream; + int res; + + FileSeqInStream_CreateVTable(&inStream); + File_Construct(&inStream.file); + + FileOutStream_CreateVTable(&outStream); + File_Construct(&outStream.file); + + size_t t4 = sizeof(UInt32); + size_t t8 = sizeof(UInt64); + if (t4 != 4 || t8 != 8) + return PrintError(rs, "Incorrect UInt32 or UInt64", rs_size); + + if (InFile_Open(&inStream.file, input_file) != 0) + return PrintError(rs, "Can not open input file", rs_size); + + + if (OutFile_Open(&outStream.file, output_file) != 0) + return PrintError(rs, "Can not open output file", rs_size); + + + UInt64 fileSize; + File_GetLength(&inStream.file, &fileSize); + res = lzma_encode(&outStream.s, &inStream.s, fileSize, rs); + + File_Close(&outStream.file); + File_Close(&inStream.file); + + if (res != SZ_OK) { + if (res == SZ_ERROR_MEM) + return PrintError(rs, kCantAllocateMessage, rs_size); + else if (res == SZ_ERROR_DATA) + return PrintError(rs, kDataErrorMessage, rs_size); + else if (res == SZ_ERROR_WRITE) + return PrintError(rs, kCantWriteMessage, rs_size); + else if (res == SZ_ERROR_READ) + return PrintError(rs, kCantReadMessage, rs_size); + return PrintErrorNumber(rs, res, rs_size); + } + return 0; +} + +int brotli_compress_internal(int input_fd, int output_fd, int quality) +{ + int res = -1; + size_t input_size = lseek(input_fd, 0, SEEK_END); + lseek(input_fd, 0, SEEK_SET); + void *input_file_ptr = mmap(NULL, input_size, PROT_READ, MAP_PRIVATE, input_fd, 0); + + if (input_file_ptr == MAP_FAILED) { + printf("Can not mmap input file: %d - %m\n", errno); + goto exit; + } + + BrotliEncoderState *bstate = BrotliEncoderCreateInstance(NULL, NULL, NULL); + if (bstate == 0) { + printf("Can not create BrotliEncoder instance\n"); + goto exit; + } + size_t max_output_size = BrotliEncoderMaxCompressedSize(input_size); + + if (max_output_size == 0) { + printf("Brotli engine error\n"); + goto exit; + } + + if (ftruncate(output_fd, max_output_size) == -1) { + printf("Can not truncate output file: %d - %m\n", errno); + goto exit; + } + + void *output_file_ptr = mmap(NULL, max_output_size, PROT_WRITE, MAP_SHARED, output_fd, 0); + if (output_file_ptr == MAP_FAILED) { + printf("Can not mmap output file: %d - %m\n", errno); + goto exit; + } + + if(!BrotliEncoderCompress(quality, + BROTLI_DEFAULT_WINDOW, + BROTLI_DEFAULT_MODE, + input_size, + input_file_ptr, + &max_output_size, + output_file_ptr)) { + printf("Compression error\n"); + goto exit; + } + if (ftruncate(output_fd, max_output_size) == -1) { + printf("Can not truncate output file after compression: %d - %m\n", errno); + goto exit; + } + + res = 0; +exit: + if (input_file_ptr) + munmap(input_file_ptr, input_size); + if (output_file_ptr) + munmap(output_file_ptr, max_output_size); + + return res; +} + +int brotli_compress(const char *input_file, const char *output_file, int quality) +{ + assert(input_file); + assert(output_file); + int res = -1; + + int input_fd = open(input_file, O_RDONLY); + if (input_fd < 0) { + printf("Can not open file: %s for read\n", input_file); + return res; + } + int output_fd = open(output_file, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR); + if (output_fd < 0) { + printf("Can not open file: %s for write (%d: %m)\n", output_file, errno); + close(input_fd); + return res; + } + + res = brotli_compress_internal(input_fd, output_fd, quality); + + close(input_fd); + close(output_fd); + + return res; +} + +void print_help(const char *arg0) +{ + assert(arg0); + errx(1, "ss_bsdiff Version 5.0\nUsage: %s [-c ] oldfile newfile patchfile\n", arg0); +} + +int parse_args(struct bsdiff_info *info, int argc, char *argv[]) +{ + assert(info); + assert(argv); + + info->comp_method = CM_LZMA; // default compression method + + struct option long_options[] = { + {"compression", optional_argument, NULL, 'c'}, + {0, 0 , 0, 0} + }; + + int opt; + + while ((opt = getopt_long(argc, argv, "c:", long_options, NULL)) != -1) { + switch (opt) { + case 'c': + if (strcmp("lzma", optarg) == 0) + info->comp_method = CM_LZMA; + else if (strcmp("brotli", optarg) == 0) + info->comp_method = CM_BROTLI; + else { + err(1, "Unknown compression method: %s", optarg); + return -1; + } + } + } + + if (optind + 2 >= argc) { + err(1, "Not enough parameters"); + print_help(argv[0]); + return -1; + } + + info->old_file = argv[optind]; + info->new_file = argv[optind+1]; + info->patch_file = argv[optind+2]; + + return 0; +} + +int MY_CDECL main(int argc, char *argv[]) +{ + char rs[800] = { 0 }; + + struct bsdiff_info info; + if (parse_args(&info, argc, argv) != 0) + return 1; + + int ret = create_patch(info.old_file, info.new_file, TEMP_PATCH_NAME, 8); +#ifdef TIME_LIMIT_CHECK + if (ret != 0) { + /* + * Creating a patch with an offset score equal to 8 may take too long + * Therefore after a certain amount of time, patch creation is aborted + * and we run again with the score equal to 4. + */ + int score = 4; + printf("Trying with offset score %d\n", score); + ret = create_patch(info.old_file, info.new_file, TEMP_PATCH_NAME, score); + } + if (ret != 0) { + if (remove(TEMP_PATCH_NAME) < 0) + printf("Failed to remove %s\n", TEMP_PATCH_NAME); + err(1, "bsdiff fails to create delta within timelimit"); + } +#endif + int res = 0; + switch(info.comp_method) { + case CM_LZMA: + res = lzma_compress(TEMP_PATCH_NAME, info.patch_file, rs, sizeof(rs)); + break; + case CM_BROTLI: + res = brotli_compress(TEMP_PATCH_NAME, info.patch_file, BROTLI_COMPRESSION_QUALITY); + break; + default: + printf("Unknown compression method\n"); + res = -1; + break; + } + + if (remove(TEMP_PATCH_NAME) < 0) + printf("Failed to remove %s\n", TEMP_PATCH_NAME); + fputs(rs, stdout); + return res; +} diff --git a/bsdiff/ss_bspatch.c b/bsdiff/ss_bspatch.c new file mode 100644 index 0000000..8b6117f --- /dev/null +++ b/bsdiff/ss_bspatch.c @@ -0,0 +1,155 @@ +/*- + * Copyright 2003-2005 Colin Percival + * Copyright 2012 Matthew Endsley + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Modifications are made in reimplementing suffix sort array generation + * and how the data is read and written to.Iterative part replaced the + * recursive implementation to avoid buffer overflow problems + */ +#define _CRT_SECURE_NO_WARNINGS +#include +#include +#include + +#include +#include +#include + +#include +#include <7zFile.h> +#include <7zVersion.h> +#include +#include + +#include "ss_bspatch_common.h" + +const char *kCantReadMessage = "Can not read input file"; +const char *kCantWriteMessage = "Can not write output file"; +const char *kCantAllocateMessage = "Can not allocate memory"; +const char *kDataErrorMessage = "Data error"; + +int PrintError(char *buffer, const char *message, int buf_size) +{ + snprintf(buffer + strlen(buffer), buf_size - strlen(buffer), + "\nError: %s\n", message); + return 1; +} + +int PrintErrorNumber(char *buffer, SRes val, int buf_size) +{ + snprintf(buffer + strlen(buffer), buf_size - strlen(buffer), + "\nError code: %x\n", (unsigned)val); + return 1; +} + +int PrintUserError(char *buffer, int buf_size) +{ + return PrintError(buffer, "Incorrect command", buf_size); +} + +int main2(int numArgs, const char *args[], char *rs, int rs_size) +{ + CFileSeqInStream inStream; + CFileOutStream outStream; + int res, fd; + int encodeMode; + unsigned char *buf_res = NULL; + unsigned char *new_data; + ssize_t new_size; + + FileSeqInStream_CreateVTable(&inStream); + File_Construct(&inStream.file); + + FileOutStream_CreateVTable(&outStream); + //File_Construct(&outStream.file); + + encodeMode = 0; + + size_t t4 = sizeof(UInt32); + size_t t8 = sizeof(UInt64); + if (t4 != 4 || t8 != 8) + return PrintError(rs, "Incorrect UInt32 or UInt64", rs_size); + + if (InFile_Open(&inStream.file, args[3]) != 0) + return PrintError(rs, "Can not open input file", rs_size); + + if (encodeMode) { + UInt64 fileSize; + File_GetLength(&inStream.file, &fileSize); + //res = Encode(&outStream.s, &inStream.s, fileSize, rs); + } else { + UInt64 unpackSize, i; + CLzmaDec state; + unsigned char header[LZMA_PROPS_SIZE + 8]; + RINOK(SeqInStream_Read(&inStream.s, header, sizeof(header))); + unpackSize = 0; + for (i = 0; i < 8; i++) + unpackSize += (UInt64)header[LZMA_PROPS_SIZE + i] << (i * 8); + buf_res = (unsigned char *)malloc(unpackSize); + if (!buf_res) + return PrintError(rs, "Failed to allocate memory", rs_size); + memset(buf_res, 0x0, unpackSize); + LzmaDec_Construct(&state); + RINOK(LzmaDec_Allocate(&state, header, LZMA_PROPS_SIZE, &g_Alloc)); + res = Decode2(&state, &outStream.s, &inStream.s, &unpackSize, buf_res); + LzmaDec_Free(&state, &g_Alloc); + File_Close(&inStream.file); + if (apply_patch(args[1], buf_res, &new_data, &new_size) != 0) { + if (new_data) + free(new_data); + return 1; + } + if (((fd = open(args[2], O_CREAT | O_TRUNC | O_WRONLY, 0666)) < 0) || + (write(fd, new_data, new_size) != new_size) || (close(fd) == -1)) + err(1, "%s", args[2]); + if (res != SZ_OK) { + free(new_data); + free(buf_res); + if (res == SZ_ERROR_MEM) + return PrintError(rs, kCantAllocateMessage, rs_size); + else if (res == SZ_ERROR_DATA) + return PrintError(rs, kDataErrorMessage, rs_size); + else if (res == SZ_ERROR_WRITE) + return PrintError(rs, kCantWriteMessage, rs_size); + else if (res == SZ_ERROR_READ) + return PrintError(rs, kCantReadMessage, rs_size); + return PrintErrorNumber(rs, res, rs_size); + } + free(new_data); + free(buf_res); + return 0; + } + return 1; +} + +int main(int numArgs, const char *args[]) +{ + char rs[800] = { 0 }; + if (numArgs != 4) + errx(1, "ss_bspatch Version 1.0\nUsage: ss_bspatch oldfile newfile patchfile\n"); + int res = main2(numArgs, args, rs, sizeof(rs)); + fputs(rs, stdout); + return res; +} diff --git a/bsdiff/ss_bspatch_common.c b/bsdiff/ss_bspatch_common.c new file mode 100644 index 0000000..e7bd45e --- /dev/null +++ b/bsdiff/ss_bspatch_common.c @@ -0,0 +1,291 @@ +/*- + * Copyright 2003-2005 Colin Percival + * Copyright 2012 Matthew Endsley + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Modifications are made in reimplementing suffix sort array generation + * and how the data is read and written to.Iterative part replaced the + * recursive implementation to avoid buffer overflow problems + */ +//#define ZLIB_MOD //not stable yet. +//#define MAX_MATCH_SIZE // define ( MAX_MATCH_SIZE or CONST_MEMORY_USAGE ) or ( none of them ) +#define CONST_MEMORY_USAGE (64*1024) //tests show smallest time when using 64 kb +#define PATCH_FILE_FORMAT_MOD +#define BSDIFF_HEADER "BSDIFF40" +#define SSDIFF_HEADER "SSDIFF40" +//#define MULTI_THREADING +#include +#include +#include + +#include +#include +#include + +#include +#include <7zFile.h> +#include <7zVersion.h> +#include +#include + +static void *SzAlloc(void *p, size_t size) +{ + p = p; + return MyAlloc(size); +} + +static void SzFree(void *p, void *address) +{ + p = p; + MyFree(address); +} +ISzAlloc g_Alloc = { SzAlloc, SzFree }; + +static off_t offtin(u_char *buf) +{ + off_t y; + + y = buf[7] & 0x7F; + y = y * 256; + y += buf[6]; + y = y * 256; + y += buf[5]; + y = y * 256; + y += buf[4]; + y = y * 256; + y += buf[3]; + y = y * 256; + y += buf[2]; + y = y * 256; + y += buf[1]; + y = y * 256; + y += buf[0]; + + if (buf[7] & 0x80) + y = -y; + + return y; +} + +#define IN_BUF_SIZE (1 << 16) +#define OUT_BUF_SIZE (1 << 16) + +SRes Decode2(CLzmaDec *state, ISeqOutStream *outStream, ISeqInStream *inStream, + UInt64 *unpackSize, unsigned char *dec_data) +{ + int thereIsSize = (*unpackSize != (UInt64)(Int64) - 1); + UInt64 offset = 0; + Byte inBuf[IN_BUF_SIZE]; + Byte outBuf[OUT_BUF_SIZE]; + size_t inPos = 0, inSize = 0, outPos = 0; + + LzmaDec_Init(state); + + offset = 0; + + for (;;) { + if (inPos == inSize) { + inSize = IN_BUF_SIZE; + RINOK(inStream->Read(inStream, inBuf, &inSize)); + inPos = 0; + } + + SRes res; + SizeT inProcessed = inSize - inPos; + SizeT outProcessed = OUT_BUF_SIZE - outPos; + ELzmaFinishMode finishMode = LZMA_FINISH_ANY; + ELzmaStatus status; + + if (thereIsSize && outProcessed > *unpackSize) { + outProcessed = (SizeT) * unpackSize; + finishMode = LZMA_FINISH_END; + } + + res = LzmaDec_DecodeToBuf(state, outBuf + outPos, &outProcessed, + inBuf + inPos, &inProcessed, finishMode, &status); + inPos += inProcessed; + outPos += outProcessed; + *unpackSize -= outProcessed; + memcpy(dec_data + offset, outBuf, outProcessed); + offset += outProcessed; + + outPos = 0; + + if ((res != SZ_OK) || (thereIsSize && *unpackSize == 0)) + return res; + + if (inProcessed == 0 && outProcessed == 0) { + if (thereIsSize || status != LZMA_STATUS_FINISHED_WITH_MARK) + return SZ_ERROR_DATA; + return res; + } + } +} + +int apply_patch(const char *oldfile, unsigned char *patch_buffer, unsigned char **dest_buf, ssize_t *dest_size) +{ + int fd = -1, result = 0; + off_t oldsize, newsize; + u_char header[16], buf[8]; + u_char *old = NULL; + off_t oldpos, newpos; + off_t ctrl[4]; /////////////////////////////////////THREAD + off_t total_write; /////////////////////////////////////////THREAD + off_t j; + off_t memory_usage = CONST_MEMORY_USAGE; + off_t match_size; + off_t patch_buffer_offset = 0; + bool flag; + + /* + File format: + 0 8 "BSDIFF40" + 8 8 X + 16 8 Y + 24 8 sizeof(newfile) + 32 X bzip2(control block) + 32+X Y bzip2(diff block) + 32+X+Y ??? bzip2(extra block) + with control block a set of triples (x,y,z) meaning "add x bytes + from oldfile to x bytes from the diff block; copy y bytes from the + extra block; seek forwards in oldfile by z bytes". + */ + // Read header + if (patch_buffer) + memcpy(header, patch_buffer, 16); + else { + printf("%s().%d Corrupt decoded patch buffer\n", __FUNCTION__, __LINE__); + return 1; + } + + /* Check for appropriate magic */ + if (memcmp(header, BSDIFF_HEADER, 8) != 0 && memcmp(header, SSDIFF_HEADER, 8) != 0) { + printf("%s().%d Patch buffer header corrupt\n", __FUNCTION__, __LINE__); + return 1; + } + + /* Read lengths from header */ + newsize = offtin(header + 8); + + if ((newsize < 0)) { + printf("%s().%d Patch buffer corrupt\n", __FUNCTION__, __LINE__); + return 1; + } + + /* Cset patch_buffer_offset at the right place */ + patch_buffer_offset += 16; + + if (((fd = open(oldfile, O_RDONLY, 0)) < 0) || + ((oldsize = lseek(fd, 0, SEEK_END)) == -1) || + ((old = malloc(memory_usage + 1)) == NULL) || + (lseek(fd, 0, SEEK_SET) != 0)) { + printf("Corruption in old file %s\n", oldfile); + result = 1; + goto Cleanup; + } + + if ((*dest_buf = malloc(newsize + 1)) == NULL) { + printf("Corruption in old file %s\n", oldfile); + result = 1; + goto Cleanup; + } + oldpos = 0; + newpos = 0; + + total_write = 0; + + while (total_write != newsize) { + /* Read control data */ + for (j = 0; j <= 3; j++) { + memcpy(buf, patch_buffer + patch_buffer_offset, 8); + patch_buffer_offset += 8; + ctrl[j] = offtin(buf); + }; + + total_write += (ctrl[0] + ctrl[1]); + newpos = ctrl[3]; + oldpos = ctrl[2]; + + ////////////////////////////////////////////////////////////////////////////////// + flag = true; + match_size = ctrl[0]; + while (flag == true) { + if (match_size <= memory_usage) { + if (pread(fd, old, match_size, oldpos) != match_size) { + printf("Corruption in old file %s\n", oldfile); + result = 1; + goto Cleanup; + } + if (newpos + match_size > newsize) { + printf("%s().%d Corrupt patch\n", __FUNCTION__, __LINE__); + result = 1; + goto Cleanup; + } + memcpy((*dest_buf) + newpos, patch_buffer + patch_buffer_offset, match_size); + patch_buffer_offset += match_size; + for (j = 0; j < match_size; j++) + (*dest_buf)[newpos + j] += old[j]; + newpos += match_size; + flag = false; + } else { + if (pread(fd, old, memory_usage, oldpos) != memory_usage) { + printf("%s().%d Corruption in old file %s\n", __FUNCTION__, __LINE__ , oldfile); + result = 1; + goto Cleanup; + } + if (newpos + memory_usage > newsize) { + printf("%s().%d Corrupt patch\n", __FUNCTION__, __LINE__); + result = 1; + goto Cleanup; + } + memcpy((*dest_buf) + newpos, patch_buffer + patch_buffer_offset, memory_usage); + patch_buffer_offset += memory_usage; + for (j = 0; j < memory_usage; j++) + (*dest_buf)[newpos + j] += old[j]; + match_size -= memory_usage; + oldpos += memory_usage; + newpos += memory_usage; + } + } + + //////////////////////////////////////////////////////////////////////////////////////// + /* Sanity-check */ + if (newpos + ctrl[1] > newsize) { + printf("%s().%d Corrupt patch\n", __FUNCTION__, __LINE__); + result = 1; + goto Cleanup; + } + /* Read extra string */ + memcpy((*dest_buf) + newpos, patch_buffer + patch_buffer_offset, ctrl[1]); + patch_buffer_offset += ctrl[1]; + }; + *dest_size = newsize; +Cleanup: + //close old file + if (fd >= 0) + close(fd); + if (old) + free(old); + return result; +} diff --git a/bsdiff/ss_bspatch_common.h b/bsdiff/ss_bspatch_common.h new file mode 100644 index 0000000..3075ea2 --- /dev/null +++ b/bsdiff/ss_bspatch_common.h @@ -0,0 +1,17 @@ +#ifndef _SS_BSPATCH_COMMON_H +#define _SS_BSPATCH_COMMON_H 1 + +#include +#include <7zFile.h> +#include <7zVersion.h> +#include +#include + +extern ISzAlloc g_Alloc; + +SRes Decode2(CLzmaDec *state, ISeqOutStream *outStream, ISeqInStream *inStream, + UInt64 *unpackSize, unsigned char *dec_data); + +int apply_patch(const char *oldfile, unsigned char *patch_buffer, unsigned char **dest_buf, ssize_t *dest_size); + +#endif /* _SS_BSPATCH_COMMON_H */ -- 2.7.4