2 source_hole.c -- source for handling huge files that are mostly NULs
3 Copyright (C) 2014-2016 Dieter Baron and Thomas Klausner
5 This file is part of libzip, a library to manipulate ZIP archives.
6 The authors can be contacted at <libzip@nih.at>
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in
15 the documentation and/or other materials provided with the
17 3. The names of the authors may not be used to endorse or promote
18 products derived from this software without specific prior
21 THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS
22 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
25 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
27 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
29 IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
31 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44 zip_source_t *source_hole_create(const char *, int flags, zip_error_t *);
52 #define MY_MIN(a, b) ((a) < (b) ? (a) : (b))
54 #define FRAGMENT_SIZE (8*1024)
56 #define MARK_BEGIN "NiH0"
57 #define MARK_DATA "NiH1"
58 #define MARK_NUL "NiH2"
61 typedef struct buffer {
62 zip_uint64_t fragment_size;
63 zip_uint8_t **fragment;
64 zip_uint64_t nfragments;
69 static void buffer_free(buffer_t *buffer);
70 static buffer_t *buffer_from_file(const char *fname, int flags, zip_error_t *error);
71 static buffer_t *buffer_new(void);
72 static zip_int64_t buffer_read(buffer_t *buffer, zip_uint8_t *data, zip_uint64_t length, zip_error_t *error);
73 static int buffer_read_file(buffer_t *buffer, FILE *f, zip_error_t *error);
74 static zip_int64_t buffer_seek(buffer_t *buffer, void *data, zip_uint64_t length, zip_error_t *error);
75 static int buffer_to_file(buffer_t *buffer, const char *fname, zip_error_t *error);
76 static zip_int64_t buffer_write(buffer_t *buffer, const zip_uint8_t *data, zip_uint64_t length, zip_error_t *error);
77 static zip_uint64_t get_u64(const zip_uint8_t *b);
78 static int only_nul(const zip_uint8_t *data, zip_uint64_t length);
79 static int write_nuls(zip_uint64_t n, FILE *f);
80 static int write_u64(zip_uint64_t u64, FILE *f);
90 static hole_t *hole_new(const char *fname, int flags, zip_error_t *error);
91 static zip_int64_t source_hole_cb(void *ud, void *data, zip_uint64_t length, zip_source_cmd_t command);
94 zip_source_t *source_hole_create(const char *fname, int flags, zip_error_t *error)
96 hole_t *ud = hole_new(fname, flags, error);
101 return zip_source_function_create(source_hole_cb, ud, error);
106 buffer_free(buffer_t *buffer)
110 if (buffer == NULL) {
114 if (buffer->fragment) {
115 for (i=0; i<buffer->nfragments; i++) {
116 free(buffer->fragment[i]);
118 free(buffer->fragment);
125 buffer_from_file(const char *fname, int flags, zip_error_t *error)
130 if ((buffer = buffer_new()) == NULL) {
131 zip_error_set(error, ZIP_ER_MEMORY, 0);
136 if ((flags & ZIP_TRUNCATE) == 0) {
137 if ((f = fopen(fname, "rb")) == NULL) {
138 if (!(errno == ENOENT && (flags & ZIP_CREATE))) {
144 if (buffer_read_file(buffer, f, error) < 0) {
162 if ((buffer = (buffer_t *)malloc(sizeof(*buffer))) == NULL) {
166 buffer->fragment = NULL;
167 buffer->nfragments = 0;
168 buffer->fragment_size = FRAGMENT_SIZE;
177 buffer_read(buffer_t *buffer, zip_uint8_t *data, zip_uint64_t length, zip_error_t *error)
179 zip_uint64_t n, i, fragment_offset;
181 length = MY_MIN(length, buffer->size - buffer->offset);
186 if (length > ZIP_INT64_MAX) {
190 i = buffer->offset / buffer->fragment_size;
191 fragment_offset = buffer->offset % buffer->fragment_size;
194 zip_uint64_t left = MY_MIN(length - n, buffer->fragment_size - fragment_offset);
196 if (buffer->fragment[i]) {
197 memcpy(data + n, buffer->fragment[i] + fragment_offset, left);
200 memset(data + n, 0, left);
209 return (zip_int64_t)n;
214 buffer_read_file(buffer_t *buffer, FILE *f, zip_error_t *error)
219 if (fread(b, 20, 1, f) != 1) {
220 zip_error_set(error, ZIP_ER_READ, errno);
224 if (memcmp(b, MARK_BEGIN, 4) != 0) {
225 zip_error_set(error, ZIP_ER_READ, EFTYPE);
229 buffer->fragment_size = get_u64(b+4);
230 buffer->size = get_u64(b+12);
232 if (buffer->size + buffer->fragment_size < buffer->size) {
233 zip_error_set(error, ZIP_ER_MEMORY, 0);
236 buffer->nfragments = (buffer->size + buffer->fragment_size - 1) / buffer->fragment_size;
237 if ((buffer->nfragments > SIZE_MAX/sizeof(buffer->fragment[0]))
238 || ((buffer->fragment = (zip_uint8_t **)malloc(sizeof(buffer->fragment[0]) * buffer->nfragments)) == NULL)) {
239 zip_error_set(error, ZIP_ER_MEMORY, 0);
243 for (i = 0; i < buffer->nfragments; i++) {
244 buffer->fragment[i] = NULL;
248 while (i < buffer->nfragments) {
249 if (fread(b, 4, 1, f) != 1) {
250 zip_error_set(error, ZIP_ER_READ, errno);
254 if (memcmp(b, MARK_DATA, 4) == 0) {
255 if (buffer->fragment_size > SIZE_MAX) {
256 zip_error_set(error, ZIP_ER_MEMORY, 0);
259 if ((buffer->fragment[i] = (zip_uint8_t *)malloc(buffer->fragment_size)) == NULL) {
260 zip_error_set(error, ZIP_ER_MEMORY, 0);
263 if (fread(buffer->fragment[i], buffer->fragment_size, 1, f) != 1) {
264 zip_error_set(error, ZIP_ER_READ, errno);
269 else if (memcmp(b, MARK_NUL, 4) == 0) {
270 if (fread(b, 8, 1, f) != 1) {
271 zip_error_set(error, ZIP_ER_READ, errno);
277 zip_error_set(error, ZIP_ER_READ, EFTYPE);
286 buffer_seek(buffer_t *buffer, void *data, zip_uint64_t length, zip_error_t *error)
288 zip_int64_t new_offset = zip_source_seek_compute_offset(buffer->offset, buffer->size, data, length, error);
290 if (new_offset < 0) {
294 buffer->offset = (zip_uint64_t)new_offset;
300 buffer_to_file(buffer_t *buffer, const char *fname, zip_error_t *error)
302 FILE *f = fopen(fname, "wb");
304 zip_uint64_t nul_run;
307 zip_error_set(error, ZIP_ER_OPEN, errno);
311 fwrite(MARK_BEGIN, 4, 1, f);
312 write_u64(buffer->fragment_size, f);
313 write_u64(buffer->size, f);
316 for (i=0; i * buffer->fragment_size <buffer->size; i++) {
317 if (buffer->fragment[i] == NULL || only_nul(buffer->fragment[i], buffer->fragment_size)) {
322 write_nuls(nul_run, f);
325 fwrite(MARK_DATA, 4, 1, f);
327 fwrite(buffer->fragment[i], 1, buffer->fragment_size, f);
332 write_nuls(nul_run, f);
335 if (fclose(f) != 0) {
336 zip_error_set(error, ZIP_ER_WRITE, errno);
345 buffer_write(buffer_t *buffer, const zip_uint8_t *data, zip_uint64_t length, zip_error_t *error)
347 zip_uint8_t **fragment;
348 if (buffer->offset + length > buffer->nfragments * buffer->fragment_size) {
349 zip_uint64_t needed_fragments = (buffer->offset + length + buffer->fragment_size - 1) / buffer->fragment_size;
350 zip_uint64_t new_capacity = buffer->nfragments;
353 if (new_capacity == 0) {
356 while (new_capacity < needed_fragments) {
360 fragment = realloc(buffer->fragment, new_capacity * sizeof(*fragment));
362 if (fragment == NULL) {
363 zip_error_set(error, ZIP_ER_MEMORY, 0);
367 for (i = buffer->nfragments; i < new_capacity; i++) {
371 buffer->fragment = fragment;
372 buffer->nfragments = new_capacity;
375 if (!only_nul(data, length)) {
376 zip_uint64_t idx, n, fragment_offset;
378 idx = buffer->offset / buffer->fragment_size;
379 fragment_offset = buffer->offset % buffer->fragment_size;
383 zip_uint64_t left = MY_MIN(length - n, buffer->fragment_size - fragment_offset);
385 if (buffer->fragment[idx] == NULL) {
386 if ((buffer->fragment[idx] = (zip_uint8_t *)malloc(buffer->fragment_size)) == NULL) {
387 zip_error_set(error, ZIP_ER_MEMORY, 0);
390 memset(buffer->fragment[idx], 0, buffer->fragment_size);
392 memcpy(buffer->fragment[idx] + fragment_offset, data + n, left);
400 buffer->offset += length;
401 if (buffer->offset > buffer->size) {
402 buffer->size = buffer->offset;
405 return (zip_int64_t)length;
410 get_u64(const zip_uint8_t *b)
414 i = (zip_uint64_t)b[0] << 56 | (zip_uint64_t)b[1] << 48 | (zip_uint64_t)b[2] << 40 | (zip_uint64_t)b[3] << 32 | (zip_uint64_t)b[4] << 24 | (zip_uint64_t)b[5] << 16 | (zip_uint64_t)b[6] << 8 | (zip_uint64_t)b[7];
421 only_nul(const zip_uint8_t *data, zip_uint64_t length)
425 for (i=0; i< length; i++) {
426 if (data[i] != '\0') {
436 write_nuls(zip_uint64_t n, FILE *f)
438 if (fwrite(MARK_NUL, 4, 1, f) != 1) {
441 return write_u64(n, f);
446 write_u64(zip_uint64_t u64, FILE *f)
450 b[0] = (zip_uint8_t)((u64 >> 56) & 0xff);
451 b[1] = (zip_uint8_t)((u64 >> 48) & 0xff);
452 b[2] = (zip_uint8_t)((u64 >> 40) & 0xff);
453 b[3] = (zip_uint8_t)((u64 >> 32) & 0xff);
454 b[4] = (zip_uint8_t)((u64 >> 24) & 0xff);
455 b[5] = (zip_uint8_t)((u64 >> 16) & 0xff);
456 b[6] = (zip_uint8_t)((u64 >> 8) & 0xff);
457 b[7] = (zip_uint8_t)(u64 & 0xff);
459 return fwrite(b, 8, 1, f) == 1 ? 0 : -1;
464 hole_free(hole_t *hole) {
468 zip_error_fini(&hole->error);
469 buffer_free(hole->in);
470 buffer_free(hole->out);
477 hole_new(const char *fname, int flags, zip_error_t *error)
479 hole_t *ctx = (hole_t *)malloc(sizeof(*ctx));
482 zip_error_set(error, ZIP_ER_MEMORY, 0);
486 if ((ctx->fname = strdup(fname)) == NULL) {
488 zip_error_set(error, ZIP_ER_MEMORY, 0);
492 if ((ctx->in = buffer_from_file(fname, flags, error)) == NULL) {
497 zip_error_init(&ctx->error);
505 source_hole_cb(void *ud, void *data, zip_uint64_t length, zip_source_cmd_t command)
507 hole_t *ctx = (hole_t *)ud;
510 case ZIP_SOURCE_BEGIN_WRITE:
511 ctx->out = buffer_new();
514 case ZIP_SOURCE_CLOSE:
517 case ZIP_SOURCE_COMMIT_WRITE:
518 if (buffer_to_file(ctx->out, ctx->fname, &ctx->error) < 0) {
521 buffer_free(ctx->in);
526 case ZIP_SOURCE_ERROR:
527 return zip_error_to_data(&ctx->error, data, length);
529 case ZIP_SOURCE_FREE:
533 case ZIP_SOURCE_OPEN:
537 case ZIP_SOURCE_READ:
538 return buffer_read(ctx->in, data, length, &ctx->error);
540 case ZIP_SOURCE_REMOVE:
541 buffer_free(ctx->in);
542 ctx->in = buffer_new();
543 buffer_free(ctx->out);
545 (void)remove(ctx->fname);
548 case ZIP_SOURCE_ROLLBACK_WRITE:
549 buffer_free(ctx->out);
553 case ZIP_SOURCE_SEEK:
554 return buffer_seek(ctx->in, data, length, &ctx->error);
556 case ZIP_SOURCE_SEEK_WRITE:
557 return buffer_seek(ctx->out, data, length, &ctx->error);
559 case ZIP_SOURCE_STAT: {
560 zip_stat_t *st = ZIP_SOURCE_GET_ARGS(zip_stat_t, data, length, &ctx->error);
566 /* TODO: return ENOENT if fname doesn't exist */
568 st->valid |= ZIP_STAT_SIZE;
569 st->size = ctx->in->size;
573 case ZIP_SOURCE_TELL:
574 return (zip_int64_t)ctx->in->offset;
576 case ZIP_SOURCE_TELL_WRITE:
577 return (zip_int64_t)ctx->out->offset;
579 case ZIP_SOURCE_WRITE:
580 return buffer_write(ctx->out, data, length, &ctx->error);
582 case ZIP_SOURCE_SUPPORTS:
583 return zip_source_make_command_bitmap(ZIP_SOURCE_BEGIN_WRITE, ZIP_SOURCE_COMMIT_WRITE, ZIP_SOURCE_CLOSE, ZIP_SOURCE_ERROR, ZIP_SOURCE_FREE, ZIP_SOURCE_OPEN, ZIP_SOURCE_READ, ZIP_SOURCE_REMOVE, ZIP_SOURCE_ROLLBACK_WRITE, ZIP_SOURCE_SEEK, ZIP_SOURCE_SEEK_WRITE, ZIP_SOURCE_STAT, ZIP_SOURCE_TELL, ZIP_SOURCE_TELL_WRITE, ZIP_SOURCE_WRITE, -1);
586 zip_error_set(&ctx->error, ZIP_ER_OPNOTSUPP, 0);