unzip: handle "central directory"
[platform/upstream/busybox.git] / archival / unzip.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * Mini unzip implementation for busybox
4  *
5  * Copyright (C) 2004 by Ed Clark
6  *
7  * Loosely based on original busybox unzip applet by Laurence Anderson.
8  * All options and features should work in this version.
9  *
10  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
11  */
12
13 /* For reference see
14  * http://www.pkware.com/company/standards/appnote/
15  * http://www.info-zip.org/pub/infozip/doc/appnote-iz-latest.zip
16  */
17
18 /* TODO
19  * Zip64 + other methods
20  */
21
22 #include "libbb.h"
23 #include "unarchive.h"
24
25 enum {
26 #if BB_BIG_ENDIAN
27         ZIP_FILEHEADER_MAGIC = 0x504b0304,
28         ZIP_CDS_MAGIC        = 0x504b0102,
29         ZIP_CDE_MAGIC        = 0x504b0506,
30         ZIP_DD_MAGIC         = 0x504b0708,
31 #else
32         ZIP_FILEHEADER_MAGIC = 0x04034b50,
33         ZIP_CDS_MAGIC        = 0x02014b50,
34         ZIP_CDE_MAGIC        = 0x06054b50,
35         ZIP_DD_MAGIC         = 0x08074b50,
36 #endif
37 };
38
39 #define ZIP_HEADER_LEN 26
40
41 typedef union {
42         uint8_t raw[ZIP_HEADER_LEN];
43         struct {
44                 uint16_t version;               /* 0-1 */
45                 uint16_t flags;                 /* 2-3 */
46                 uint16_t method;                /* 4-5 */
47                 uint16_t modtime;               /* 6-7 */
48                 uint16_t moddate;               /* 8-9 */
49                 uint32_t crc32 PACKED;          /* 10-13 */
50                 uint32_t cmpsize PACKED;        /* 14-17 */
51                 uint32_t ucmpsize PACKED;       /* 18-21 */
52                 uint16_t filename_len;          /* 22-23 */
53                 uint16_t extra_len;             /* 24-25 */
54         } formatted PACKED;
55 } zip_header_t; /* PACKED - gcc 4.2.1 doesn't like it (spews warning) */
56
57 /* Check the offset of the last element, not the length.  This leniency
58  * allows for poor packing, whereby the overall struct may be too long,
59  * even though the elements are all in the right place.
60  */
61 struct BUG_zip_header_must_be_26_bytes {
62         char BUG_zip_header_must_be_26_bytes[
63                 offsetof(zip_header_t, formatted.extra_len) + 2
64                         == ZIP_HEADER_LEN ? 1 : -1];
65 };
66
67 #define FIX_ENDIANNESS_ZIP(zip_header) do { \
68         (zip_header).formatted.version      = SWAP_LE16((zip_header).formatted.version     ); \
69         (zip_header).formatted.flags        = SWAP_LE16((zip_header).formatted.flags       ); \
70         (zip_header).formatted.method       = SWAP_LE16((zip_header).formatted.method      ); \
71         (zip_header).formatted.modtime      = SWAP_LE16((zip_header).formatted.modtime     ); \
72         (zip_header).formatted.moddate      = SWAP_LE16((zip_header).formatted.moddate     ); \
73         (zip_header).formatted.crc32        = SWAP_LE32((zip_header).formatted.crc32       ); \
74         (zip_header).formatted.cmpsize      = SWAP_LE32((zip_header).formatted.cmpsize     ); \
75         (zip_header).formatted.ucmpsize     = SWAP_LE32((zip_header).formatted.ucmpsize    ); \
76         (zip_header).formatted.filename_len = SWAP_LE16((zip_header).formatted.filename_len); \
77         (zip_header).formatted.extra_len    = SWAP_LE16((zip_header).formatted.extra_len   ); \
78 } while (0)
79
80 #define CDS_HEADER_LEN 42
81
82 typedef union {
83         uint8_t raw[CDS_HEADER_LEN];
84         struct {
85                 /* uint32_t signature; 50 4b 01 02 */
86                 uint16_t version_made_by;       /* 0-1 */
87                 uint16_t version_needed;        /* 2-3 */
88                 uint16_t cds_flags;             /* 4-5 */
89                 uint16_t method;                /* 6-7 */
90                 uint16_t mtime;                 /* 8-9 */
91                 uint16_t mdate;                 /* 10-11 */
92                 uint32_t crc32;                 /* 12-15 */
93                 uint32_t cmpsize;               /* 16-19 */
94                 uint32_t ucmpsize;              /* 20-23 */
95                 uint16_t file_name_length;      /* 24-25 */
96                 uint16_t extra_field_length;    /* 26-27 */
97                 uint16_t file_comment_length;   /* 28-29 */
98                 uint16_t disk_number_start;     /* 30-31 */
99                 uint16_t internal_file_attributes; /* 32-33 */
100                 uint32_t external_file_attributes PACKED; /* 34-37 */
101                 uint32_t relative_offset_of_local_header PACKED; /* 38-41 */
102         } formatted PACKED;
103 } cds_header_t;
104
105 struct BUG_cds_header_must_be_42_bytes {
106         char BUG_cds_header_must_be_42_bytes[
107                 offsetof(cds_header_t, formatted.relative_offset_of_local_header) + 4
108                         == CDS_HEADER_LEN ? 1 : -1];
109 };
110
111 #define FIX_ENDIANNESS_CDS(cds_header) do { \
112         (cds_header).formatted.crc32        = SWAP_LE32((cds_header).formatted.crc32       ); \
113         (cds_header).formatted.cmpsize      = SWAP_LE32((cds_header).formatted.cmpsize     ); \
114         (cds_header).formatted.ucmpsize     = SWAP_LE32((cds_header).formatted.ucmpsize    ); \
115         (cds_header).formatted.file_name_length = SWAP_LE16((cds_header).formatted.file_name_length); \
116         (cds_header).formatted.extra_field_length = SWAP_LE16((cds_header).formatted.extra_field_length); \
117         (cds_header).formatted.file_comment_length = SWAP_LE16((cds_header).formatted.file_comment_length); \
118 } while (0)
119
120 #define CDE_HEADER_LEN 16
121
122 typedef union {
123         uint8_t raw[CDE_HEADER_LEN];
124         struct {
125                 /* uint32_t signature; 50 4b 05 06 */
126                 uint16_t this_disk_no;
127                 uint16_t disk_with_cds_no;
128                 uint16_t cds_entries_on_this_disk;
129                 uint16_t cds_entries_total;
130                 uint32_t cds_size;
131                 uint32_t cds_offset;
132                 /* uint16_t file_comment_length; */
133                 /* .ZIP file comment (variable size) */
134         } formatted PACKED;
135 } cde_header_t;
136
137 struct BUG_cde_header_must_be_16_bytes {
138         char BUG_cde_header_must_be_16_bytes[
139                 sizeof(cde_header_t) == CDE_HEADER_LEN ? 1 : -1];
140 };
141
142 #define FIX_ENDIANNESS_CDE(cde_header) do { \
143         (cde_header).formatted.cds_offset = SWAP_LE16((cde_header).formatted.cds_offset); \
144 } while (0)
145
146 enum { zip_fd = 3 };
147
148
149 #if ENABLE_DESKTOP
150 /* NB: does not preserve file position! */
151 static uint32_t find_cds_offset(void)
152 {
153         unsigned char buf[1024];
154         cde_header_t cde_header;
155         unsigned char *p;
156         off_t end;
157
158         end = xlseek(zip_fd, 0, SEEK_END);
159         if (end < 1024)
160                 end = 1024;
161         end -= 1024;
162         xlseek(zip_fd, end, SEEK_SET);
163         full_read(zip_fd, buf, 1024);
164
165         p = buf;
166         while (p <= buf + 1024 - CDE_HEADER_LEN - 4) {
167                 if (*p != 'P') {
168                         p++;
169                         continue;
170                 }
171                 if (*++p != 'K')
172                         continue;
173                 if (*++p != 5)
174                         continue;
175                 if (*++p != 6)
176                         continue;
177                 /* we found CDE! */
178                 memcpy(cde_header.raw, p + 1, CDE_HEADER_LEN);
179                 FIX_ENDIANNESS_CDE(cde_header);
180                 return cde_header.formatted.cds_offset;
181         }
182         bb_error_msg_and_die("can't find file table");
183 };
184
185 static uint32_t read_next_cds(int count_m1, uint32_t cds_offset, cds_header_t *cds_ptr)
186 {
187         off_t org;
188
189         org = xlseek(zip_fd, 0, SEEK_CUR);
190
191         if (!cds_offset)
192                 cds_offset = find_cds_offset();
193
194         while (count_m1-- >= 0) {
195                 xlseek(zip_fd, cds_offset + 4, SEEK_SET);
196                 xread(zip_fd, cds_ptr->raw, CDS_HEADER_LEN);
197                 FIX_ENDIANNESS_CDS(*cds_ptr);
198                 cds_offset += 4 + CDS_HEADER_LEN
199                         + cds_ptr->formatted.file_name_length
200                         + cds_ptr->formatted.extra_field_length
201                         + cds_ptr->formatted.file_comment_length;
202         }
203
204         xlseek(zip_fd, org, SEEK_SET);
205         return cds_offset;
206 };
207 #endif
208
209 static void unzip_skip(off_t skip)
210 {
211         bb_copyfd_exact_size(zip_fd, -1, skip);
212 }
213
214 static void unzip_create_leading_dirs(const char *fn)
215 {
216         /* Create all leading directories */
217         char *name = xstrdup(fn);
218         if (bb_make_directory(dirname(name), 0777, FILEUTILS_RECUR)) {
219                 bb_error_msg_and_die("exiting"); /* bb_make_directory is noisy */
220         }
221         free(name);
222 }
223
224 static void unzip_extract(zip_header_t *zip_header, int dst_fd)
225 {
226         if (zip_header->formatted.method == 0) {
227                 /* Method 0 - stored (not compressed) */
228                 off_t size = zip_header->formatted.ucmpsize;
229                 if (size)
230                         bb_copyfd_exact_size(zip_fd, dst_fd, size);
231         } else {
232                 /* Method 8 - inflate */
233                 inflate_unzip_result res;
234                 if (inflate_unzip(&res, zip_header->formatted.cmpsize, zip_fd, dst_fd) < 0)
235                         bb_error_msg_and_die("inflate error");
236                 /* Validate decompression - crc */
237                 if (zip_header->formatted.crc32 != (res.crc ^ 0xffffffffL)) {
238                         bb_error_msg_and_die("crc error");
239                 }
240                 /* Validate decompression - size */
241                 if (zip_header->formatted.ucmpsize != res.bytes_out) {
242                         /* Don't die. Who knows, maybe len calculation
243                          * was botched somewhere. After all, crc matched! */
244                         bb_error_msg("bad length");
245                 }
246         }
247 }
248
249 int unzip_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
250 int unzip_main(int argc, char **argv)
251 {
252         enum { O_PROMPT, O_NEVER, O_ALWAYS };
253
254         zip_header_t zip_header;
255         smallint verbose = 1;
256         smallint listing = 0;
257         smallint overwrite = O_PROMPT;
258 #if ENABLE_DESKTOP
259         uint32_t cds_offset;
260         unsigned cds_entries;
261 #endif
262         unsigned total_size;
263         unsigned total_entries;
264         int dst_fd = -1;
265         char *src_fn = NULL;
266         char *dst_fn = NULL;
267         llist_t *zaccept = NULL;
268         llist_t *zreject = NULL;
269         char *base_dir = NULL;
270         int i, opt;
271         int opt_range = 0;
272         char key_buf[80];
273         struct stat stat_buf;
274
275         /* '-' makes getopt return 1 for non-options */
276         while ((opt = getopt(argc, argv, "-d:lnopqx")) != -1) {
277                 switch (opt_range) {
278                 case 0: /* Options */
279                         switch (opt) {
280                         case 'l': /* List */
281                                 listing = 1;
282                                 break;
283
284                         case 'n': /* Never overwrite existing files */
285                                 overwrite = O_NEVER;
286                                 break;
287
288                         case 'o': /* Always overwrite existing files */
289                                 overwrite = O_ALWAYS;
290                                 break;
291
292                         case 'p': /* Extract files to stdout and fall through to set verbosity */
293                                 dst_fd = STDOUT_FILENO;
294
295                         case 'q': /* Be quiet */
296                                 verbose = 0;
297                                 break;
298
299                         case 1: /* The zip file */
300                                 /* +5: space for ".zip" and NUL */
301                                 src_fn = xmalloc(strlen(optarg) + 5);
302                                 strcpy(src_fn, optarg);
303                                 opt_range++;
304                                 break;
305
306                         default:
307                                 bb_show_usage();
308
309                         }
310                         break;
311
312                 case 1: /* Include files */
313                         if (opt == 1) {
314                                 llist_add_to(&zaccept, optarg);
315                                 break;
316                         }
317                         if (opt == 'd') {
318                                 base_dir = optarg;
319                                 opt_range += 2;
320                                 break;
321                         }
322                         if (opt == 'x') {
323                                 opt_range++;
324                                 break;
325                         }
326                         bb_show_usage();
327
328                 case 2 : /* Exclude files */
329                         if (opt == 1) {
330                                 llist_add_to(&zreject, optarg);
331                                 break;
332                         }
333                         if (opt == 'd') { /* Extract to base directory */
334                                 base_dir = optarg;
335                                 opt_range++;
336                                 break;
337                         }
338                         /* fall through */
339
340                 default:
341                         bb_show_usage();
342                 }
343         }
344
345         if (src_fn == NULL) {
346                 bb_show_usage();
347         }
348
349         /* Open input file */
350         if (LONE_DASH(src_fn)) {
351                 xdup2(STDIN_FILENO, zip_fd);
352                 /* Cannot use prompt mode since zip data is arriving on STDIN */
353                 if (overwrite == O_PROMPT)
354                         overwrite = O_NEVER;
355         } else {
356                 static const char extn[][5] = {"", ".zip", ".ZIP"};
357                 int orig_src_fn_len = strlen(src_fn);
358                 int src_fd = -1;
359
360                 for (i = 0; (i < 3) && (src_fd == -1); i++) {
361                         strcpy(src_fn + orig_src_fn_len, extn[i]);
362                         src_fd = open(src_fn, O_RDONLY);
363                 }
364                 if (src_fd == -1) {
365                         src_fn[orig_src_fn_len] = '\0';
366                         bb_error_msg_and_die("can't open %s, %s.zip, %s.ZIP", src_fn, src_fn, src_fn);
367                 }
368                 xmove_fd(src_fd, zip_fd);
369         }
370
371         /* Change dir if necessary */
372         if (base_dir)
373                 xchdir(base_dir);
374
375         if (verbose) {
376                 printf("Archive:  %s\n", src_fn);
377                 if (listing){
378                         puts("  Length     Date   Time    Name\n"
379                              " --------    ----   ----    ----");
380                 }
381         }
382
383         total_size = 0;
384         total_entries = 0;
385 #if ENABLE_DESKTOP
386         cds_entries = 0;
387         cds_offset = 0;
388 #endif
389         while (1) {
390                 uint32_t magic;
391
392                 /* Check magic number */
393                 xread(zip_fd, &magic, 4);
394                 /* Central directory? It's at the end, so exit */
395                 if (magic == ZIP_CDS_MAGIC)
396                         break;
397 #if ENABLE_DESKTOP
398                 /* Data descriptor? It was a streaming file, go on */
399                 if (magic == ZIP_DD_MAGIC) {
400                         /* skip over duplicate crc32, cmpsize and ucmpsize */
401                         unzip_skip(3 * 4);
402                         continue;
403                 }
404 #endif
405                 if (magic != ZIP_FILEHEADER_MAGIC)
406                         bb_error_msg_and_die("invalid zip magic %08X", (int)magic);
407
408                 /* Read the file header */
409                 xread(zip_fd, zip_header.raw, ZIP_HEADER_LEN);
410                 FIX_ENDIANNESS_ZIP(zip_header);
411                 if ((zip_header.formatted.method != 0) && (zip_header.formatted.method != 8)) {
412                         bb_error_msg_and_die("unsupported method %d", zip_header.formatted.method);
413                 }
414 #if !ENABLE_DESKTOP
415                 if (zip_header.formatted.flags & 0x0009) {
416                         bb_error_msg_and_die("zip flags 1 and 8 are not supported");
417                 }
418 #else
419                 if (zip_header.formatted.flags & 0x0001) {
420                         /* 0x0001 - encrypted */
421                         bb_error_msg_and_die("zip flag 1 (encryption) is not supported");
422                 }
423                 if (zip_header.formatted.flags & 0x0008) {
424                         cds_header_t cds_header;
425                         /* 0x0008 - streaming. [u]cmpsize can be reliably gotten
426                          * only from Central Directory. See unzip_doc.txt */
427                         cds_offset = read_next_cds(total_entries - cds_entries, cds_offset, &cds_header);
428                         cds_entries = total_entries + 1;
429                         zip_header.formatted.crc32    = cds_header.formatted.crc32;
430                         zip_header.formatted.cmpsize  = cds_header.formatted.cmpsize;
431                         zip_header.formatted.ucmpsize = cds_header.formatted.ucmpsize;
432                 }
433 #endif
434
435                 /* Read filename */
436                 free(dst_fn);
437                 dst_fn = xzalloc(zip_header.formatted.filename_len + 1);
438                 xread(zip_fd, dst_fn, zip_header.formatted.filename_len);
439
440                 /* Skip extra header bytes */
441                 unzip_skip(zip_header.formatted.extra_len);
442
443                 /* Filter zip entries */
444                 if (find_list_entry(zreject, dst_fn)
445                  || (zaccept && !find_list_entry(zaccept, dst_fn))
446                 ) { /* Skip entry */
447                         i = 'n';
448
449                 } else { /* Extract entry */
450                         if (listing) { /* List entry */
451                                 if (verbose) {
452                                         unsigned dostime = zip_header.formatted.modtime | (zip_header.formatted.moddate << 16);
453                                         printf("%9u  %02u-%02u-%02u %02u:%02u   %s\n",
454                                            zip_header.formatted.ucmpsize,
455                                            (dostime & 0x01e00000) >> 21,
456                                            (dostime & 0x001f0000) >> 16,
457                                            (((dostime & 0xfe000000) >> 25) + 1980) % 100,
458                                            (dostime & 0x0000f800) >> 11,
459                                            (dostime & 0x000007e0) >> 5,
460                                            dst_fn);
461                                         total_size += zip_header.formatted.ucmpsize;
462                                 } else {
463                                         /* short listing -- filenames only */
464                                         puts(dst_fn);
465                                 }
466                                 i = 'n';
467                         } else if (dst_fd == STDOUT_FILENO) { /* Extracting to STDOUT */
468                                 i = -1;
469                         } else if (last_char_is(dst_fn, '/')) { /* Extract directory */
470                                 if (stat(dst_fn, &stat_buf) == -1) {
471                                         if (errno != ENOENT) {
472                                                 bb_perror_msg_and_die("can't stat '%s'", dst_fn);
473                                         }
474                                         if (verbose) {
475                                                 printf("   creating: %s\n", dst_fn);
476                                         }
477                                         unzip_create_leading_dirs(dst_fn);
478                                         if (bb_make_directory(dst_fn, 0777, 0)) {
479                                                 bb_error_msg_and_die("exiting");
480                                         }
481                                 } else {
482                                         if (!S_ISDIR(stat_buf.st_mode)) {
483                                                 bb_error_msg_and_die("'%s' exists but is not directory", dst_fn);
484                                         }
485                                 }
486                                 i = 'n';
487
488                         } else {  /* Extract file */
489  check_file:
490                                 if (stat(dst_fn, &stat_buf) == -1) { /* File does not exist */
491                                         if (errno != ENOENT) {
492                                                 bb_perror_msg_and_die("can't stat '%s'", dst_fn);
493                                         }
494                                         i = 'y';
495                                 } else { /* File already exists */
496                                         if (overwrite == O_NEVER) {
497                                                 i = 'n';
498                                         } else if (S_ISREG(stat_buf.st_mode)) { /* File is regular file */
499                                                 if (overwrite == O_ALWAYS) {
500                                                         i = 'y';
501                                                 } else {
502                                                         printf("replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ", dst_fn);
503                                                         if (!fgets(key_buf, sizeof(key_buf), stdin)) {
504                                                                 bb_perror_msg_and_die("can't read input");
505                                                         }
506                                                         i = key_buf[0];
507                                                 }
508                                         } else { /* File is not regular file */
509                                                 bb_error_msg_and_die("'%s' exists but is not regular file", dst_fn);
510                                         }
511                                 }
512                         }
513                 }
514
515                 switch (i) {
516                 case 'A':
517                         overwrite = O_ALWAYS;
518                 case 'y': /* Open file and fall into unzip */
519                         unzip_create_leading_dirs(dst_fn);
520                         dst_fd = xopen(dst_fn, O_WRONLY | O_CREAT | O_TRUNC);
521                 case -1: /* Unzip */
522                         if (verbose) {
523                                 printf("  inflating: %s\n", dst_fn);
524                         }
525                         unzip_extract(&zip_header, dst_fd);
526                         if (dst_fd != STDOUT_FILENO) {
527                                 /* closing STDOUT is potentially bad for future business */
528                                 close(dst_fd);
529                         }
530                         break;
531
532                 case 'N':
533                         overwrite = O_NEVER;
534                 case 'n':
535                         /* Skip entry data */
536                         unzip_skip(zip_header.formatted.cmpsize);
537                         break;
538
539                 case 'r':
540                         /* Prompt for new name */
541                         printf("new name: ");
542                         if (!fgets(key_buf, sizeof(key_buf), stdin)) {
543                                 bb_perror_msg_and_die("can't read input");
544                         }
545                         free(dst_fn);
546                         dst_fn = xstrdup(key_buf);
547                         chomp(dst_fn);
548                         goto check_file;
549
550                 default:
551                         printf("error: invalid response [%c]\n",(char)i);
552                         goto check_file;
553                 }
554
555                 total_entries++;
556         }
557
558         if (listing && verbose) {
559                 printf(" --------                   -------\n"
560                        "%9d                   %d files\n",
561                        total_size, total_entries);
562         }
563
564         return 0;
565 }