2 zipcmp.c -- compare zip files
3 Copyright (C) 2003-2022 Dieter Baron and Thomas Klausner
5 This file is part of libzip, a library to manipulate ZIP archives.
6 The authors can be contacted at <libzip@nih.at>
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in
15 the documentation and/or other materials provided with the
17 3. The names of the authors may not be used to endorse or promote
18 products derived from this software without specific prior
21 THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS
22 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
25 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
27 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
29 IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
31 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
58 #include "diff_output.h"
66 size_t comment_length;
74 const zip_uint8_t *data;
81 zip_uint32_t comp_method;
82 struct ef *extra_fields;
83 zip_uint16_t n_extra_fields;
85 zip_uint32_t comment_length;
91 const char * const name;
94 const enum_map_t comp_methods[] = {
95 { 0, "Stored (no compression)" },
97 { 2, "Reduced with compression factor 1" },
98 { 3, "Reduced with compression factor 2" },
99 { 4, "Reduced with compression factor 3" },
100 { 5, "Reduced with compression factor 4" },
102 { 7, "Reserved for Tokenizing compression algorithm" },
104 { 9, "Enhanced Deflating using Deflate64(tm)" },
105 { 10, "PKWARE Data Compression Library Imploding (old IBM TERSE)" },
106 { 11, "11 (Reserved by PKWARE)" },
108 { 13, "13 (Reserved by PKWARE)" },
109 { 14, "LZMA (EFS)" },
110 { 15, "15 (Reserved by PKWARE)" },
111 { 16, "16 (Reserved by PKWARE)" },
112 { 17, "17 (Reserved by PKWARE)" },
113 { 18, "IBM TERSE (new)" },
114 { 19, "IBM LZ77 z Architecture (PFS)" },
115 { 20, "Zstandard compressed data (obsolete)" },
116 { 93, "Zstandard compressed data" },
117 { 95, "XZ compressed data" },
118 { 97, "WavPack compressed data" },
119 { 98, "PPMd version I, Rev 1" },
120 { 99, "WinZIP AES Encryption" },
124 const enum_map_t extra_fields[] = {
126 { 0x0001, "Zip64 extended information" },
127 { 0x0007, "AV Info" },
128 { 0x0008, "Reserved for extended language encoding data (PFS)" },
131 { 0x000c, "OpenVMS" },
133 { 0x000e, "Reserved for file stream and fork descriptors" },
134 { 0x000f, "Patch Descriptor" },
135 { 0x0014, "PKCS#7 Store for X.509 Certificates" },
136 { 0x0015, "X.509 Certificate ID and Signature for individual file" },
137 { 0x0016, "X.509 Certificate ID for Central Directory" },
138 { 0x0017, "Strong Encryption Header" },
139 { 0x0018, "Record Management Controls" },
140 { 0x0019, "PKCS#7 Encryption Recipient Certificate List" },
141 { 0x0065, "IBM S/390 (Z390), AS/400 (I400) attributes - uncompressed" },
142 { 0x0066, "Reserved for IBM S/390 (Z390), AS/400 (I400) attributes - compressed" },
143 { 0x4690, "POSZIP 4690 (reserved)" },
145 /* Third-Party defined; see InfoZIP unzip sources proginfo/extrafld.txt */
146 { 0x07c8, "Info-ZIP Macintosh (old)" },
147 { 0x2605, "ZipIt Macintosh (first version)" },
148 { 0x2705, "ZipIt Macintosh 1.3.5+ (w/o full filename)" },
149 { 0x2805, "ZipIt Macintosh 1.3.5+" },
150 { 0x334d, "Info-ZIP Macintosh (new)" },
151 { 0x4154, "Tandem NSK" },
152 { 0x4341, "Acorn/SparkFS" },
153 { 0x4453, "Windows NT security descriptor" },
154 { 0x4704, "VM/CMS" },
156 { 0x4854, "Theos, old unofficial port" },
157 { 0x4b46, "FWKCS MD5" },
158 { 0x4c41, "OS/2 access control list (text ACL)" },
159 { 0x4d49, "Info-ZIP OpenVMS (obsolete)" },
160 { 0x4d63, "Macintosh SmartZIP" },
161 { 0x4f4c, "Xceed original location extra field" },
162 { 0x5356, "AOS/VS (ACL)" },
163 { 0x5455, "extended timestamp" },
164 { 0x554e, "Xceed unicode extra field" },
165 { 0x5855, "Info-ZIP UNIX (original)" },
166 { 0x6375, "Info-ZIP UTF-8 comment field" },
167 { 0x6542, "BeOS (BeBox, PowerMac, etc.)" },
169 { 0x7075, "Info-ZIP UTF-8 name field" },
170 { 0x7441, "AtheOS (AtheOS/Syllable attributes)" },
171 { 0x756e, "ASi UNIX" },
172 { 0x7855, "Info-ZIP UNIX" },
173 { 0x7875, "Info-ZIP UNIX 3rd generation" },
174 { 0x9901, "WinZIP AES encryption" },
175 { 0xa220, "Microsoft Open Packaging Growth Hint" },
176 { 0xcafe, "executable Java JAR file" },
177 { 0xfb4a, "SMS/QDOS" }, /* per InfoZIP extrafld.txt */
178 { 0xfd4a, "SMS/QDOS" }, /* per appnote.txt */
183 const char *progname;
185 #define PROGRAM "zipcmp"
187 #define USAGE "usage: %s [-hipqtVv] archive1 archive2\n"
189 char help_head[] = PROGRAM " (" PACKAGE ") by Dieter Baron and Thomas Klausner\n\n";
192 -h display this help message\n\
193 -C check archive consistencies\n\
194 -i compare names ignoring case distinctions\n\
195 -p compare as many details as possible\n\
197 -s print a summary\n\
198 -t test zip files (compare file contents to checksum)\n\
199 -V display version number\n\
200 -v be verbose (print differences, default)\n\
202 Report bugs to <libzip@nih.at>.\n";
204 char version_string[] = PROGRAM " (" PACKAGE " " VERSION ")\n\
205 Copyright (C) 2003-2022 Dieter Baron and Thomas Klausner\n\
206 " PACKAGE " comes with ABSOLUTELY NO WARRANTY, to the extent permitted by law.\n";
208 #define OPTIONS "hVCipqstv"
211 #define BOTH_ARE_ZIPS(a) (a[0].za && a[1].za)
213 static int comment_compare(const char *c1, size_t l1, const char *c2, size_t l2);
214 static int compare_list(char *const name[2], const void *list[2], const zip_uint64_t list_length[2], int element_size, int (*cmp)(const void *a, const void *b), int (*ignore)(const void *list, int last, const void *other), int (*check)(char *const name[2], const void *a, const void *b), void (*print)(char side, const void *element), void (*start_file)(const void *element));
215 static int compare_zip(char *const zn[]);
216 static int ef_compare(char *const name[2], const struct entry *e1, const struct entry *e2);
217 static int ef_order(const void *a, const void *b);
218 static void ef_print(char side, const void *p);
219 static int ef_read(zip_t *za, zip_uint64_t idx, struct entry *e);
220 static int entry_cmp(const void *p1, const void *p2);
221 static int entry_ignore(const void *p1, int last, const void *o);
222 static int entry_paranoia_checks(char *const name[2], const void *p1, const void *p2);
223 static void entry_print(char side, const void *p);
224 static void entry_start_file(const void *p);
225 static const char *map_enum(const enum_map_t *map, uint32_t value);
227 static int is_directory(const char *name);
229 static int list_directory(const char *name, struct archive *a);
231 static int list_zip(const char *name, struct archive *a);
232 static int test_file(zip_t *za, zip_uint64_t idx, const char *zipname, const char *filename, zip_uint64_t size, zip_uint32_t crc);
234 int ignore_case, test_files, paranoid, verbose, have_directory, check_consistency, summary;
235 int plus_count = 0, minus_count = 0;
237 diff_output_t output;
241 main(int argc, char *const argv[]) {
248 check_consistency = 0;
254 while ((c = getopt(argc, argv, OPTIONS)) != -1) {
257 check_consistency = 1;
279 fputs(help_head, stdout);
280 printf(USAGE, progname);
284 fputs(version_string, stdout);
288 fprintf(stderr, USAGE, progname);
293 if (argc != optind + 2) {
294 fprintf(stderr, USAGE, progname);
298 exit((compare_zip(argv + optind) == 0) ? 0 : 1);
303 compare_zip(char *const zn[]) {
310 for (i = 0; i < 2; i++) {
316 a[i].comment_length = 0;
318 if (is_directory(zn[i])) {
320 fprintf(stderr, "%s: reading directories not supported\n", progname);
323 if (list_directory(zn[i], a + i) < 0)
326 paranoid = 0; /* paranoid checks make no sense for directories, since they compare zip metadata */
330 if (list_zip(zn[i], a + i) < 0)
334 qsort(a[i].entry, a[i].nentry, sizeof(a[i].entry[0]), entry_cmp);
337 diff_output_init(&output, verbose, zn);
343 res = compare_list(zn, (const void **)e, n, sizeof(e[i][0]), entry_cmp, have_directory ? entry_ignore : NULL, paranoid ? entry_paranoia_checks : NULL, entry_print, entry_start_file);
346 if (comment_compare(a[0].comment, a[0].comment_length, a[1].comment, a[1].comment_length) != 0) {
347 if (a[0].comment_length > 0) {
348 diff_output_data(&output, '-', (const zip_uint8_t *)a[0].comment, a[0].comment_length, "archive comment");
351 if (a[1].comment_length > 0) {
352 diff_output_data(&output, '+', (const zip_uint8_t *)a[1].comment, a[1].comment_length, "archive comment");
359 for (i = 0; i < 2; i++) {
365 for (j = 0; j < a[i].nentry; j++) {
366 free(a[i].entry[j].name);
372 printf("%d files removed, %d files added\n", minus_count, plus_count);
389 compute_crc(const char *fname) {
391 uLong crc = crc32(0L, Z_NULL, 0);
396 if ((f = fopen(fname, "rb")) == NULL) {
397 fprintf(stderr, "%s: can't open %s: %s\n", progname, fname, strerror(errno));
401 while ((n = fread(buffer, 1, sizeof(buffer), f)) > 0) {
402 crc = crc32(crc, buffer, (unsigned int)n);
406 fprintf(stderr, "%s: read error on %s: %s\n", progname, fname, strerror(errno));
413 return (zip_int64_t)crc;
419 is_directory(const char *name) {
422 if (stat(name, &st) < 0)
425 return S_ISDIR(st.st_mode);
431 list_directory(const char *name, struct archive *a) {
435 size_t prefix_length;
437 char *const names[2] = {(char *)name, NULL};
440 if ((fts = fts_open(names, FTS_NOCHDIR | FTS_LOGICAL, NULL)) == NULL) {
441 fprintf(stderr, "%s: can't open directory '%s': %s\n", progname, name, strerror(errno));
444 prefix_length = strlen(name) + 1;
448 while ((ent = fts_read(fts))) {
451 switch (ent->fts_info) {
470 if (a->nentry >= nalloc) {
472 if (nalloc > SIZE_MAX / sizeof(a->entry[0])) {
473 fprintf(stderr, "%s: malloc failure\n", progname);
476 a->entry = realloc(a->entry, sizeof(a->entry[0]) * nalloc);
477 if (a->entry == NULL) {
478 fprintf(stderr, "%s: malloc failure\n", progname);
483 if (ent->fts_info == FTS_D) {
486 if (ent->fts_path[prefix_length - 1] == '\0') {
490 dir_name = malloc(strlen(ent->fts_path + prefix_length) + 2);
491 if (dir_name == NULL) {
492 fprintf(stderr, "%s: malloc failure\n", progname);
495 sprintf(dir_name, "%s/", ent->fts_path + prefix_length);
496 a->entry[a->nentry].name = dir_name;
497 a->entry[a->nentry].size = 0;
498 a->entry[a->nentry].crc = 0;
501 a->entry[a->nentry].name = strdup(ent->fts_path + prefix_length);
502 a->entry[a->nentry].size = (zip_uint64_t)ent->fts_statp->st_size;
503 if ((crc = compute_crc(ent->fts_accpath)) < 0) {
508 a->entry[a->nentry].crc = (zip_uint32_t)crc;
515 if (fts_close(fts)) {
516 fprintf(stderr, "%s: error closing directory '%s': %s\n", progname, a->name, strerror(errno));
526 list_zip(const char *name, struct archive *a) {
532 if ((za = zip_open(name, check_consistency ? ZIP_CHECKCONS : 0, &err)) == NULL) {
534 zip_error_init_with_code(&error, err);
535 fprintf(stderr, "%s: cannot open zip archive '%s': %s\n", progname, name, zip_error_strerror(&error));
536 zip_error_fini(&error);
541 a->nentry = (zip_uint64_t)zip_get_num_entries(za, 0);
546 if ((a->nentry > SIZE_MAX / sizeof(a->entry[0])) || (a->entry = (struct entry *)malloc(sizeof(a->entry[0]) * a->nentry)) == NULL) {
547 fprintf(stderr, "%s: malloc failure\n", progname);
551 for (i = 0; i < a->nentry; i++) {
552 zip_stat_index(za, i, 0, &st);
553 a->entry[i].name = strdup(st.name);
554 a->entry[i].size = st.size;
555 a->entry[i].crc = st.crc;
557 test_file(za, i, name, st.name, st.size, st.crc);
559 a->entry[i].comp_method = st.comp_method;
560 ef_read(za, i, a->entry + i);
561 a->entry[i].comment = zip_file_get_comment(za, i, &a->entry[i].comment_length, 0);
564 a->entry[i].comp_method = 0;
565 a->entry[i].n_extra_fields = 0;
571 a->comment = zip_get_archive_comment(za, &length, 0);
572 a->comment_length = (size_t)length;
576 a->comment_length = 0;
585 comment_compare(const char *c1, size_t l1, const char *c2, size_t l2) {
592 if (c1 == NULL || c2 == NULL)
595 return memcmp(c1, c2, (size_t)l2);
599 static int compare_list(char *const name[2], const void *list[2], const zip_uint64_t list_length[2], int element_size, int (*cmp)(const void *a, const void *b), int (*ignore)(const void *list, int last, const void *other), int (*check)(char *const name[2], const void *a, const void *b), void (*print)(char side, const void *element), void (*start_file)(const void *element)) {
604 #define INC(k) (i[k]++, list[k] = ((const char *)list[k]) + element_size)
607 if (ignore && ignore(list[k], i[k] >= list_length[k] - 1, i[1-k] < list_length[1-k] ? list[1-k] : NULL)) { \
610 print((k) ? '+' : '-', list[k]); \
611 (k) ? plus_count++ : minus_count++; \
617 while (i[0] < list_length[0] && i[1] < list_length[1]) {
618 int c = cmp(list[0], list[1]);
625 diff |= check(name, list[0], list[1]);
627 diff_output_end_file(&output);
643 for (j = 0; j < 2; j++) {
644 while (i[j] < list_length[j]) {
655 ef_read(zip_t *za, zip_uint64_t idx, struct entry *e) {
656 zip_int16_t n_local, n_central;
659 if ((n_local = zip_file_extra_fields_count(za, idx, ZIP_FL_LOCAL)) < 0 || (n_central = zip_file_extra_fields_count(za, idx, ZIP_FL_CENTRAL)) < 0) {
663 e->n_extra_fields = (zip_uint16_t)(n_local + n_central);
665 if ((e->extra_fields = (struct ef *)malloc(sizeof(e->extra_fields[0]) * e->n_extra_fields)) == NULL)
668 for (i = 0; i < n_local; i++) {
669 e->extra_fields[i].name = e->name;
670 e->extra_fields[i].data = zip_file_extra_field_get(za, idx, i, &e->extra_fields[i].id, &e->extra_fields[i].size, ZIP_FL_LOCAL);
671 if (e->extra_fields[i].data == NULL)
673 e->extra_fields[i].flags = ZIP_FL_LOCAL;
675 for (; i < e->n_extra_fields; i++) {
676 e->extra_fields[i].name = e->name;
677 e->extra_fields[i].data = zip_file_extra_field_get(za, idx, (zip_uint16_t)(i - n_local), &e->extra_fields[i].id, &e->extra_fields[i].size, ZIP_FL_CENTRAL);
678 if (e->extra_fields[i].data == NULL)
680 e->extra_fields[i].flags = ZIP_FL_CENTRAL;
683 qsort(e->extra_fields, e->n_extra_fields, sizeof(e->extra_fields[0]), ef_order);
690 ef_compare(char *const name[2], const struct entry *e1, const struct entry *e2) {
694 ef[0] = e1->extra_fields;
695 ef[1] = e2->extra_fields;
696 n[0] = e1->n_extra_fields;
697 n[1] = e2->n_extra_fields;
699 return compare_list(name, (const void **)ef, n, sizeof(struct ef), ef_order, NULL, NULL, ef_print, NULL);
704 ef_order(const void *ap, const void *bp) {
705 const struct ef *a, *b;
710 if (a->flags != b->flags)
711 return a->flags - b->flags;
713 return a->id - b->id;
714 if (a->size != b->size)
715 return a->size - b->size;
716 return memcmp(a->data, b->data, a->size);
721 ef_print(char side, const void *p) {
722 const struct ef *ef = (struct ef *)p;
724 diff_output_data(&output, side, ef->data, ef->size, " %s extra field %s", ef->flags == ZIP_FL_LOCAL ? "local" : "central", map_enum(extra_fields, ef->id));
729 entry_cmp(const void *p1, const void *p2) {
730 const struct entry *e1, *e2;
733 e1 = (struct entry *)p1;
734 e2 = (struct entry *)p2;
736 if ((c = (ignore_case ? strcasecmp : strcmp)(e1->name, e2->name)) != 0)
738 if (e1->size != e2->size) {
739 if (e1->size > e2->size)
744 if (e1->crc != e2->crc)
745 return (int)e1->crc - (int)e2->crc;
752 entry_ignore(const void *p, int last, const void *o) {
753 const struct entry *e = (const struct entry *)p;
754 const struct entry *other = (const struct entry *)o;
756 size_t length = strlen(e[0].name);
758 if (length == 0 || e[0].name[length - 1] != '/') {
759 /* not a directory */
763 if (other != NULL && strlen(other->name) > length && strncmp(other->name, e[0].name, length) == 0) {
764 /* not empty in other archive */
768 if (last || (strlen(e[1].name) < length || strncmp(e[0].name, e[1].name, length) != 0)) {
769 /* empty in this archive */
773 /* not empty in this archive */
779 entry_paranoia_checks(char *const name[2], const void *p1, const void *p2) {
780 const struct entry *e1, *e2;
783 e1 = (struct entry *)p1;
784 e2 = (struct entry *)p2;
788 if (e1->comp_method != e2->comp_method) {
789 diff_output(&output, '-', " compression method %s", map_enum(comp_methods, e1->comp_method));
790 diff_output(&output, '+', " compression method %s", map_enum(comp_methods, e2->comp_method));
794 if (ef_compare(name, e1, e2) != 0) {
798 if (comment_compare(e1->comment, e1->comment_length, e2->comment, e2->comment_length) != 0) {
799 diff_output_data(&output, '-', (const zip_uint8_t *)e1->comment, e1->comment_length, " comment");
800 diff_output_data(&output, '+', (const zip_uint8_t *)e2->comment, e2->comment_length, " comment");
808 static void entry_print(char side, const void *p) {
809 const struct entry *e = (struct entry *)p;
811 diff_output_file(&output, side, e->name, e->size, e->crc);
815 static void entry_start_file(const void *p) {
816 const struct entry *e = (struct entry *)p;
818 diff_output_start_file(&output, e->name, e->size, e->crc);
823 test_file(zip_t *za, zip_uint64_t idx, const char *zipname, const char *filename, zip_uint64_t size, zip_uint32_t crc) {
830 if ((zf = zip_fopen_index(za, idx, 0)) == NULL) {
831 fprintf(stderr, "%s: %s: cannot open file %s (index %" PRIu64 "): %s\n", progname, zipname, filename, idx, zip_strerror(za));
835 ncrc = (zip_uint32_t)crc32(0, NULL, 0);
838 while ((n = zip_fread(zf, buf, sizeof(buf))) > 0) {
839 nsize += (zip_uint64_t)n;
840 ncrc = (zip_uint32_t)crc32(ncrc, (const Bytef *)buf, (unsigned int)n);
844 fprintf(stderr, "%s: %s: error reading file %s (index %" PRIu64 "): %s\n", progname, zipname, filename, idx, zip_file_strerror(zf));
852 fprintf(stderr, "%s: %s: file %s (index %" PRIu64 "): unexpected length %" PRId64 " (should be %" PRId64 ")\n", progname, zipname, filename, idx, nsize, size);
856 fprintf(stderr, "%s: %s: file %s (index %" PRIu64 "): unexpected length %x (should be %x)\n", progname, zipname, filename, idx, ncrc, crc);
864 static const char *map_enum(const enum_map_t *map, uint32_t value) {
865 static char unknown[16];
868 while (map[i].value < UINT32_MAX) {
869 if (map[i].value == value) {
875 snprintf(unknown, sizeof(unknown), "unknown (%u)", value);
876 unknown[sizeof(unknown) - 1] = '\0';