2 * Copyright 2003-2005 Colin Percival
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted providing that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
26 // This file is a nearly copy of bsdiff.c from the
27 // bsdiff-4.3 distribution; the primary differences being how the
28 // input and output data are read, data post processing,
29 // search function modification and the error handling.
31 #define _CRT_SECURE_NO_WARNINGS
33 #include <sys/types.h>
48 #include <7zVersion.h>
51 #include <brotli/encode.h>
53 #define SUFSORT_MOD // Change suffix sorting algorithm from Qsufsort to Divsufsort
54 //#define ZLIB_MOD // Change compression algorithm
55 #define SEARCH_RECURSIVE // Change recursion of Search function to Iteration
56 //#define MAX_MATCH_SIZE // define ( MAX_MATCH_SIZE or CONST_MEMORY_USAGE ) or ( none of them ); use max_match memory for old file at patch side
57 #define CONST_MEMORY_USAGE 16384 // patch in m+O(1); m=size of new file; use only const memory for old file at patch side;
58 #define PATCH_FILE_FORMAT_MOD // no accumulation of diff and extra in db and eb; immediate write; also write all 3 parts of control stmt at same time
59 #define MULTI_THREADING 1 // only with #define CONST_MEMORY_USAGE or #define MAX_MATCH_SIZE
60 #define TIME_LIMIT_CHECK 300
61 #define TEMP_PATCH_NAME "temp_patch"
62 #define BROTLI_COMPRESSION_QUALITY 9
65 1) Use either (MAX_MATCH_SIZE or CONST_MEMORY_USAGE) or (none of both).
66 2.) PATCH_FILE_FORMAT_MOD may or may not be used, independently of everything else.
67 3.) MULTI_THREADING can be used only with (CONST_MEMORY_USAGE or MAX_MATCH_SIZE).
69 #ifdef TIME_LIMIT_CHECK
70 long long outer_count = 0;
72 void get_time_stamp(void)
77 gettimeofday(&tv, NULL);
79 msec = (int)(tv.tv_usec / 1000);
80 snprintf(ts1, 256, "%06d.%03d", sec % 100000, msec);
84 //supporting only 32 bit divsufsort for now.
85 #include <divsufsort.h>
88 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
90 #ifdef MULTI_THREADING
116 enum compression_method {
122 const char *old_file;
123 const char *new_file;
124 const char *patch_file;
125 enum compression_method comp_method;
128 struct data_thread data;
135 static off_t matchlen(u_char *old, off_t oldsize, u_char *new, off_t newsize)
138 for (i = 0; (i < oldsize) && (i < newsize); i++)
139 if (old[i] != new[i])
144 static off_t search(saidx_t *I, u_char *old, off_t oldsize,
145 u_char *new, off_t newsize, off_t st, off_t en, off_t *pos)
148 while (en - st >= 2) {
149 x = st + (en - st) / 2;
150 if (memcmp(old + I[x], new, MIN(oldsize - I[x], newsize)) < 0)
156 x = matchlen(old + I[st], oldsize - I[st], new, newsize);
157 y = matchlen(old + I[en], oldsize - I[en], new, newsize);
168 static void offtout(off_t x, u_char *buf)
204 int create_patch(const char *old_file, const char *new_file, const char *temp_patch, int offset_oldscore)
209 data.num_threads = MULTI_THREADING;
210 data.new = (u_char **)malloc(sizeof(u_char *)*data.num_threads);
212 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
213 that we never try to malloc(0) and get a NULL pointer */
214 if (((data.fd = open(old_file, O_RDONLY, 0)) < 0) ||
215 ((data.oldsize = lseek(data.fd, 0, SEEK_END)) == -1) ||
216 ((data.old = malloc(data.oldsize + 1)) == NULL) ||
217 (lseek(data.fd, 0, SEEK_SET) != 0) ||
218 (read(data.fd, data.old, data.oldsize) != data.oldsize) ||
219 (close(data.fd) == -1))
220 err(1, "%s", old_file);
222 data.I = malloc((data.oldsize + 1) * sizeof(saidx_t));
223 divsufsort(data.old, data.I, data.oldsize);
225 /* Allocate newsize+1 bytes instead of newsize bytes to ensure
226 that we never try to malloc(0) and get a NULL pointer */
227 if (((data.fd = open(new_file, O_RDONLY, 0)) < 0) ||
228 ((data.newsize = lseek(data.fd, 0, SEEK_END)) == -1) ||
229 (lseek(data.fd, 0, SEEK_SET) != 0))
230 err(1, "%s", new_file);
231 data.size_thread = (data.newsize / data.num_threads);
234 for (j = 0; j < data.num_threads; ++j) {
235 if (j != data.num_threads - 1) {
236 if (((data.new[j] = (u_char *)malloc(sizeof(u_char) * (data.size_thread + 1))) == NULL) ||
237 (lseek(data.fd, 0, SEEK_CUR) != j * data.size_thread) ||
238 (read(data.fd, data.new[j], data.size_thread) != data.size_thread))
239 err(1, "%s", new_file);
241 if (((data.new[j] = (u_char *)malloc(sizeof(u_char) * (data.newsize - (j * data.size_thread) + 1))) == NULL) ||
242 (lseek(data.fd, 0, SEEK_CUR) != j * data.size_thread) ||
243 (read(data.fd, data.new[j], data.newsize - (j * data.size_thread)) != data.newsize - (j * data.size_thread)))
244 err(1, "here %s", new_file);
248 if ((close(data.fd) == -1))
249 err(1, "%s", new_file);
251 /* Create the patch file */
252 if ((data.pf = fopen(temp_patch, "w")) == NULL)
253 err(1, "%s", temp_patch);
257 8 8 length of bzip2ed ctrl block
258 16 8 length of bzip2ed diff block
259 24 8 length of new file */
262 32 ?? Bzip2ed ctrl block
263 ?? ?? Bzip2ed diff block
264 ?? ?? Bzip2ed extra block */
265 memcpy(data.header, "SSDIFF40", 8);
267 offtout(data.newsize, data.header + 8);
268 if (fwrite(data.header, 16, 1, data.pf) != 1)
269 err(1, "fwrite(%s)", temp_patch);
271 /* Compute the differences, writing ctrl as we go */
272 data.pfbz2 = data.pf;
273 //if ((data.pfbz2 = BZ2_bzWriteOpen(&data.bz2err, data.pf, 9, 0, 0)) == NULL)
274 //errx(1, "BZ2_bzWriteOpen, bz2err = %d", data.bz2err);
277 //BZ2_bzWriteClose(&data.bz2err, data.pfbz2, 0, NULL, NULL);
278 //if (data.bz2err != BZ_OK)
279 //errx(1, "BZ2_bzWriteClose, bz2err = %d", data.bz2err);
281 int ret = Function(offset_oldscore);
282 #ifdef TIME_LIMIT_CHECK
284 printf("bsdiff fails to create delta with offset score %d\n", offset_oldscore);
285 printf("Old: [%s] -> New: [%s]\n", old_file, new_file);
288 /* Seek to the beginning, write the header, and close the file */
289 if (fseeko(data.pf, 0, SEEK_SET))
292 if (fwrite(data.header, 16, 1, data.pf) != 1)
293 err(1, "fwrite(%s)", temp_patch);
296 /* Free the memory we used */
304 int Function(int offset_oldscore)
306 unsigned int thread_num = 0;
311 get_time_stamp(); //total time capturing
314 #ifdef PATCH_FILE_FORMAT_MOD
345 for (scsc = scan += len; scan < end; scan++) {
346 len = search(data.I, data.old, data.oldsize, data.new[thread_num] + scan, end - scan,
347 len, data.oldsize, &pos); // Passing parameter as len instead of 0 for ramdisk.img etc taking long time
349 for (; scsc < scan + len; scsc++)
350 if ((scsc + lastoffset < data.oldsize) &&
351 (data.old[scsc + lastoffset] == data.new[thread_num][scsc]))
353 #ifdef TIME_LIMIT_CHECK
355 if (outer_count > 1000) {
357 get_time_stamp(); //total time capturing
359 //printf("\ntime diff = %d\n", (t2 - t1));
360 if ((t2 - t1) > TIME_LIMIT_CHECK)
364 if (((len == oldscore) && (len != 0)) ||
365 (len > oldscore + offset_oldscore))
368 if ((scan + lastoffset < data.oldsize) &&
369 (data.old[scan + lastoffset] == data.new[thread_num][scan]))
372 if ((len != oldscore) || (scan == end)) {
376 for (i = 0; (lastscan + i < scan) && (lastpos + i < data.oldsize); ) {
377 if (data.old[lastpos + i] == data.new[thread_num][lastscan + i])
380 if (s * 2 - i > Sf * 2 - lenf) {
390 for (i = 1; (scan >= lastscan + i) && (pos >= i); i++) {
391 if (data.old[pos - i] == data.new[thread_num][scan - i])
393 if (s * 2 - i > Sb * 2 - lenb) {
400 if (lastscan + lenf > scan - lenb) {
401 overlap = (lastscan + lenf) - (scan - lenb);
405 for (i = 0; i < overlap; i++) {
406 if (data.new[thread_num][lastscan + lenf - overlap + i] ==
407 data.old[lastpos + lenf - overlap + i])
409 if (data.new[thread_num][scan - lenb + i] ==
410 data.old[pos - lenb + i])
418 lenf += lens - overlap;
422 if (((db = malloc(lenf + 1)) == NULL) ||
423 ((eb = malloc((scan - lenb) - (lastscan + lenf) + 1)) == NULL))
426 for (i = 0; i < lenf; i++)
427 db[i] = data.new[thread_num][lastscan + i] - data.old[lastpos + i];
428 for (i = 0; i < (scan - lenb) - (lastscan + lenf); i++)
429 eb[i] = data.new[thread_num][lastscan + lenf + i];
431 eblen = (scan - lenb) - (lastscan + lenf);
432 offtout(lenf, data.buf2);
433 offtout((scan - lenb) - (lastscan + lenf), data.buf2 + 8);
434 offtout(lastpos, data.buf2 + 16);
435 offtout((data.size_thread * thread_num) + lastscan, data.buf2 + 24);
436 fwrite(data.buf2, 1, 32, data.pf);
437 //if (data.bz2err != BZ_OK)
438 //errx(1, "fwrite, bz2err = %d", data.bz2err);
440 fwrite(db, 1, dblen, data.pf);
441 //if (data.bz2err != BZ_OK)
442 //errx(1, "fwrite, bz2err = %d", data.bz2err);
444 fwrite(eb, 1, eblen, data.pf);
445 //if (data.bz2err != BZ_OK)
446 //errx(1, "fwrite, bz2err = %d", data.bz2err);
451 lastscan = scan - lenb;
452 lastpos = pos - lenb;
453 lastoffset = pos - scan;
459 const char *kCantReadMessage = "Can not read input file";
460 const char *kCantWriteMessage = "Can not write output file";
461 const char *kCantAllocateMessage = "Can not allocate memory";
462 const char *kDataErrorMessage = "Data error";
464 static void *SzAlloc(void *p, size_t size)
467 return MyAlloc(size);
469 static void SzFree(void *p, void *address)
474 static ISzAlloc g_Alloc = { SzAlloc, SzFree };
476 int PrintError(char *buffer, const char *message, int buf_size)
478 snprintf(buffer + strlen(buffer), buf_size - strlen(buffer),
479 "\nError: %s\n", message);
483 int PrintErrorNumber(char *buffer, SRes val, int buf_size)
485 snprintf(buffer + strlen(buffer), buf_size - strlen(buffer),
486 "\nError code: %x\n", (unsigned)val);
490 int PrintUserError(char *buffer, int buf_size)
492 return PrintError(buffer, "Incorrect command", buf_size);
495 #define IN_BUF_SIZE (1 << 16)
496 #define OUT_BUF_SIZE (1 << 16)
499 static SRes lzma_encode(ISeqOutStream *outStream, ISeqInStream *inStream, UInt64 fileSize, char *rs)
507 enc = LzmaEnc_Create(&g_Alloc);
511 LzmaEncProps_Init(&props);
512 res = LzmaEnc_SetProps(enc, &props);
515 Byte header[LZMA_PROPS_SIZE + 8];
516 size_t headerSize = LZMA_PROPS_SIZE;
519 res = LzmaEnc_WriteProperties(enc, header, &headerSize);
520 for (i = 0; i < 8; i++)
521 header[headerSize++] = (Byte)(fileSize >> (8 * i));
522 if (outStream->Write(outStream, header, headerSize) != headerSize)
523 res = SZ_ERROR_WRITE;
526 res = LzmaEnc_Encode(enc, outStream, inStream, NULL, &g_Alloc, &g_Alloc);
529 LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc);
533 int lzma_compress(const char *input_file, const char *output_file, char *rs, int rs_size)
539 CFileSeqInStream inStream;
540 CFileOutStream outStream;
543 FileSeqInStream_CreateVTable(&inStream);
544 File_Construct(&inStream.file);
546 FileOutStream_CreateVTable(&outStream);
547 File_Construct(&outStream.file);
549 size_t t4 = sizeof(UInt32);
550 size_t t8 = sizeof(UInt64);
551 if (t4 != 4 || t8 != 8)
552 return PrintError(rs, "Incorrect UInt32 or UInt64", rs_size);
554 if (InFile_Open(&inStream.file, input_file) != 0)
555 return PrintError(rs, "Can not open input file", rs_size);
558 if (OutFile_Open(&outStream.file, output_file) != 0)
559 return PrintError(rs, "Can not open output file", rs_size);
563 File_GetLength(&inStream.file, &fileSize);
564 res = lzma_encode(&outStream.s, &inStream.s, fileSize, rs);
566 File_Close(&outStream.file);
567 File_Close(&inStream.file);
570 if (res == SZ_ERROR_MEM)
571 return PrintError(rs, kCantAllocateMessage, rs_size);
572 else if (res == SZ_ERROR_DATA)
573 return PrintError(rs, kDataErrorMessage, rs_size);
574 else if (res == SZ_ERROR_WRITE)
575 return PrintError(rs, kCantWriteMessage, rs_size);
576 else if (res == SZ_ERROR_READ)
577 return PrintError(rs, kCantReadMessage, rs_size);
578 return PrintErrorNumber(rs, res, rs_size);
583 int brotli_compress_internal(int input_fd, int output_fd, int quality)
586 size_t input_size = lseek(input_fd, 0, SEEK_END);
587 lseek(input_fd, 0, SEEK_SET);
588 void *input_file_ptr = mmap(NULL, input_size, PROT_READ, MAP_PRIVATE, input_fd, 0);
590 if (input_file_ptr == MAP_FAILED) {
591 printf("Can not mmap input file: %d - %m\n", errno);
595 BrotliEncoderState *bstate = BrotliEncoderCreateInstance(NULL, NULL, NULL);
597 printf("Can not create BrotliEncoder instance\n");
600 size_t max_output_size = BrotliEncoderMaxCompressedSize(input_size);
602 if (max_output_size == 0) {
603 printf("Brotli engine error\n");
607 if (ftruncate(output_fd, max_output_size) == -1) {
608 printf("Can not truncate output file: %d - %m\n", errno);
612 void *output_file_ptr = mmap(NULL, max_output_size, PROT_WRITE, MAP_SHARED, output_fd, 0);
613 if (output_file_ptr == MAP_FAILED) {
614 printf("Can not mmap output file: %d - %m\n", errno);
618 if(!BrotliEncoderCompress(quality,
619 BROTLI_DEFAULT_WINDOW,
625 printf("Compression error\n");
628 if (ftruncate(output_fd, max_output_size) == -1) {
629 printf("Can not truncate output file after compression: %d - %m\n", errno);
636 munmap(input_file_ptr, input_size);
638 munmap(output_file_ptr, max_output_size);
643 int brotli_compress(const char *input_file, const char *output_file, int quality)
649 int input_fd = open(input_file, O_RDONLY);
651 printf("Can not open file: %s for read\n", input_file);
654 int output_fd = open(output_file, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR);
656 printf("Can not open file: %s for write (%d: %m)\n", output_file, errno);
661 res = brotli_compress_internal(input_fd, output_fd, quality);
669 void print_help(const char *arg0)
672 errx(1, "ss_bsdiff Version 5.0\nUsage: %s [-c <lzma|brotli>] oldfile newfile patchfile\n", arg0);
675 int parse_args(struct bsdiff_info *info, int argc, char *argv[])
680 info->comp_method = CM_LZMA; // default compression method
682 struct option long_options[] = {
683 {"compression", optional_argument, NULL, 'c'},
689 while ((opt = getopt_long(argc, argv, "c:", long_options, NULL)) != -1) {
692 if (strcmp("lzma", optarg) == 0)
693 info->comp_method = CM_LZMA;
694 else if (strcmp("brotli", optarg) == 0)
695 info->comp_method = CM_BROTLI;
697 err(1, "Unknown compression method: %s", optarg);
703 if (optind + 2 >= argc) {
704 err(1, "Not enough parameters");
709 info->old_file = argv[optind];
710 info->new_file = argv[optind+1];
711 info->patch_file = argv[optind+2];
716 int MY_CDECL main(int argc, char *argv[])
718 char rs[800] = { 0 };
720 struct bsdiff_info info;
721 if (parse_args(&info, argc, argv) != 0)
724 int ret = create_patch(info.old_file, info.new_file, TEMP_PATCH_NAME, 8);
725 #ifdef TIME_LIMIT_CHECK
727 printf("Trying with offset score 2\n");
728 ret = create_patch(info.old_file, info.new_file, TEMP_PATCH_NAME, 2);
731 printf("Trying with offset score 0\n");
732 ret = create_patch(info.old_file, info.new_file, TEMP_PATCH_NAME, 0);
735 err(1, "bsdiff fails to create delta within timelimit");
738 switch(info.comp_method) {
740 res = lzma_compress(TEMP_PATCH_NAME, info.patch_file, rs, sizeof(rs));
743 res = brotli_compress(TEMP_PATCH_NAME, info.patch_file, BROTLI_COMPRESSION_QUALITY);
746 printf("Unknown compression method\n");
751 if (remove(TEMP_PATCH_NAME) < 0)
752 printf("Failed to remove %s\n", TEMP_PATCH_NAME);