#include <fcntl.h>
#include <time.h>
+#ifdef _WIN32
+ #include <windows.h>
+ #include <fileapi.h>
+ #include <io.h>
+#endif
+
#include "repo.h"
#include "repopage.h"
/*
The format is tailored for fast decompression (i.e. only byte based),
and skewed to ASCII content (highest bit often not set):
-
+
a 0LLLLLLL
- self-describing ASCII character hex L
b 100lllll <l+1 bytes>
unsigned int litofs = 0;
memset(htab, -1, sizeof (htab));
memset(hnext, -1, sizeof (hnext));
+ if (in_len > BLOCK_SIZE)
+ return 0; /* Hey! */
while (io + 2 < in_len)
{
/* Search for a match of the string starting at IN, we have at
mlen = 0;
mofs = 0;
- for (tries = 0; try != -1 && tries < 12; tries++)
+ for (tries = 0; try != (Ref)-1 && tries < 12; tries++, try = hnext[try])
{
- if (try < io
- && in[try] == in[io] && in[try + 1] == in[io + 1])
+ if (in[try] == in[io] && in[try + 1] == in[io + 1])
{
mlen = 2;
mofs = (io - try) - 1;
break;
}
- try = hnext[try];
}
- for (; try != -1 && tries < 12; tries++)
+ for (; try != (Ref)-1 && tries < 12; tries++, try = hnext[try])
{
- /* assert(mlen >= 2); */
/* assert(io + mlen < in_len); */
/* Try a match starting from [io] with the strings at [try].
- That's only sensible if TRY actually is before IO (can happen
- with uninit hash table). If we have a previous match already
- we're only going to take the new one if it's longer, hence
- check the potentially last character. */
- if (try < io && in[try + mlen] == in[io + mlen])
+ If we have a previous match already we're only going to take
+ the new one if it's longer, hence check the potentially last
+ character first. */
+ if (in[try + mlen] == in[io + mlen] && !memcmp(in + try, in + io, mlen))
{
- unsigned int this_len, this_ofs;
- if (memcmp(in + try, in + io, mlen))
- goto no_match;
- this_len = mlen + 1;
+ /* Found a longer match */
+ mlen++;
/* Now try extending the match by more characters. */
- for (;
- io + this_len < in_len
- && in[try + this_len] == in[io + this_len]; this_len++)
- ;
-#if 0
- unsigned int testi;
- for (testi = 0; testi < this_len; testi++)
- assert(in[try + testi] == in[io + testi]);
-#endif
- this_ofs = (io - try) - 1;
- /*if (this_ofs > 65535)
- goto no_match; */
-#if 0
- assert(this_len >= 2);
- assert(this_len >= mlen);
- assert(this_len > mlen || (this_len == mlen && this_ofs > mofs));
-#endif
- mlen = this_len, mofs = this_ofs;
+ while (io + mlen < in_len && in[try + mlen] == in[io + mlen])
+ mlen++;
+ mofs = (io - try) - 1;
/* If our match extends up to the end of input, no next
match can become better. This is not just an
- optimization, it establishes a loop invariant
+ optimization, it establishes the loop invariant
(io + mlen < in_len). */
if (io + mlen >= in_len)
- goto match_done;
+ break;
}
- no_match:
- try = hnext[try];
- /*if (io - try - 1 >= 65536)
- break;*/
}
-
-match_done:
+ if (mlen < 3)
+ mlen = 0;
if (mlen)
{
/*fprintf(stderr, "%d %d\n", mlen, mofs);*/
- if (mlen == 2 && (litofs || mofs >= 1024))
- mlen = 0;
- /*else if (mofs >= 65536)
- mlen = 0;*/
- else if (mofs >= 65536)
+#if BLOCK_SIZE > 65536
+ if (mofs >= 65536)
{
if (mlen >= 2048 + 5)
mlen = 2047 + 5;
else if (mlen < 5)
mlen = 0;
}
- else if (mlen < 3)
- mlen = 0;
- /*else if (mlen >= 4096 + 19)
- mlen = 4095 + 19;*/
- else if (mlen >= 2048 + 19)
+#endif
+ if (mlen >= 2048 + 19)
mlen = 2047 + 19;
/* Skip this match if the next character would deliver a better one,
but only do this if we have the chance to really extend the
hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5;
hval = hval & (HS - 1);
try = htab[hval];
- if (try < io + 1
- && in[try] == in[io + 1] && in[try + 1] == in[io + 2])
+ if (try != (Ref)-1 && in[try] == in[io + 1] && in[try + 1] == in[io + 2])
{
- unsigned int this_len;
- this_len = 2;
- for (;
- io + 1 + this_len < in_len
- && in[try + this_len] == in[io + 1 + this_len];
- this_len++)
- ;
+ unsigned int this_len = 2;
+ while (io + 1 + this_len < in_len && in[try + this_len] == in[io + 1 + this_len])
+ this_len++;
if (this_len >= mlen)
mlen = 0;
}
}
if (!mlen)
{
+ /* Found no match, start/extend literal */
if (!litofs)
litofs = io + 1;
io++;
}
else
{
+ /* Found a match. First dump literals */
if (litofs)
{
unsigned litlen;
}
else if (mofs >= 65536)
{
+#if BLOCK_SIZE <= 65536
+ return 0;
+#else
assert(mlen >= 5 && mlen < 2048 + 5);
if (oo + 5 >= out_len)
return 0;
out[oo++] = mofs & 0xff;
out[oo++] = (mofs >> 8) & 0xff;
out[oo++] = mofs >> 16;
+#endif
}
else if (mlen >= 3 && mlen <= 18)
{
htab[hval] = io;
}
io++;
- };
+ }
}
}
/* We might have some characters left. */
litlen -= 32;
}
}
- litofs = 0;
}
return oo;
}
{
o = in[0] | (in[1] << 8);
in += 2;
- first = first & 31;
- first += 3;
+ first = (first & 15) + 3;
break;
}
case 15:
- /* f1 1111llll <8o> <8o> <8l> */
- /* f2 11110lll <8o> <8o> <8l> */
- /* g 11111lll <8o> <8o> <8o> <8l> */
+ /* f2 11110lll <8l> <8o> <8o> */
+ /* g 11111lll <8l> <8o> <8o> <8o> */
{
first = first & 15;
if (first >= 8)
return out - orig_out;
}
+static unsigned int
+check_decompress_buf(const unsigned char *in, unsigned int in_len)
+{
+ unsigned int out_len = 0;
+ const unsigned char *in_end = in + in_len;
+ while (in < in_end)
+ {
+ unsigned int first = *in++;
+ int o;
+ switch (first >> 4)
+ {
+ default:
+ /* This default case can't happen, but GCCs VRP is not strong
+ enough to see this, so make this explicitely not fall to
+ the end of the switch, so that we don't have to initialize
+ o above. */
+ continue;
+ case 0: case 1:
+ case 2: case 3:
+ case 4: case 5:
+ case 6: case 7:
+ out_len++;
+ continue;
+ case 8: case 9:
+ /* b 100lllll <l+1 bytes> */
+ first = (first & 31) + 1;
+ in += first;
+ out_len += first;
+ continue;
+ case 10: case 11:
+ /* c 101oolll <8o> */
+ o = (first & (3 << 3)) << 5 | *in++;
+ first = (first & 7) + 2;
+ break;
+ case 12: case 13:
+ /* d 110lllll <8o> */
+ o = *in++;
+ first = (first & 31) + 10;
+ break;
+ case 14:
+ /* e 1110llll <8o> <8o> */
+ o = in[0] | (in[1] << 8);
+ in += 2;
+ first = (first & 15) + 3;
+ break;
+ case 15:
+ /* f1 1111llll <8l> <8o> <8o> */
+ /* g 11111lll <8l> <8o> <8o> <8o> */
+ first = first & 15;
+ if (first >= 8)
+ {
+ first = (((first - 8) << 8) | in[0]) + 5;
+ o = in[1] | (in[2] << 8) | (in[3] << 16);
+ in += 4;
+ }
+ else
+ {
+ first = ((first << 8) | in[0]) + 19;
+ o = in[1] | (in[2] << 8);
+ in += 3;
+ }
+ break;
+ }
+ /* fprintf(stderr, "ref: %d @ %d\n", first, o); */
+ if (o >= out_len)
+ return 0;
+ out_len += first;
+ }
+ return out_len;
+}
+
/**********************************************************************/
void repopagestore_init(Repopagestore *store)
else
{
/* Quick check in case all pages are already mapped and consecutive. */
- for (i = pstart; i <= pend; i++)
- if (store->mapped_at[i] == -1
- || (i > pstart
- && store->mapped_at[i]
- != store->mapped_at[i-1] + REPOPAGE_BLOBSIZE))
+ for (pnum = pstart; pnum <= pend; pnum++)
+ if (store->mapped_at[pnum] == -1
+ || (pnum > pstart
+ && store->mapped_at[pnum]
+ != store->mapped_at[pnum-1] + REPOPAGE_BLOBSIZE))
break;
- if (i > pend)
+ if (pnum > pend)
return store->blob_store + store->mapped_at[pstart];
}
if (store->nmapped < 4)
store->nmapped = 4;
store->mapped = solv_realloc2(store->mapped, store->nmapped, sizeof(store->mapped[0]));
- memset(store->mapped + oldcan, 0, (store->nmapped - oldcan) * sizeof (store->mapped[0]));
+ for (i = oldcan; i < store->nmapped; i++)
+ store->mapped[i] = -1;
store->blob_store = solv_realloc2(store->blob_store, store->nmapped, REPOPAGE_BLOBSIZE);
#ifdef DEBUG_PAGING
fprintf(stderr, "PAGE: can map %d pages\n", store->nmapped);
{
unsigned int pnum_mapped_at;
unsigned int oldpnum = store->mapped[i];
- if (oldpnum)
+ if (oldpnum != -1)
{
- if (--oldpnum == pnum)
+ if (oldpnum == pnum)
continue; /* already have the correct page */
/* Evict this page. */
#ifdef DEBUG_PAGING
fprintf(stderr, "PAGE: evict page %d from %d\n", oldpnum, i);
#endif
- store->mapped[i] = 0;
+ store->mapped[i] = -1;
store->mapped_at[oldpnum] = -1;
}
/* check if we can copy the correct content (before it gets evicted) */
fprintf(stderr, "PAGECOPY: %d from %d to %d\n", pnum, pnum_mapped_at / REPOPAGE_BLOBSIZE, i);
#endif
memcpy(dest, store->blob_store + pnum_mapped_at, REPOPAGE_BLOBSIZE);
- store->mapped[pnum_mapped_at / REPOPAGE_BLOBSIZE] = 0; /* slot is now empty */
- store->mapped[i] = pnum + 1;
+ store->mapped[pnum_mapped_at / REPOPAGE_BLOBSIZE] = -1;
+ store->mapped[i] = pnum;
store->mapped_at[pnum] = i * REPOPAGE_BLOBSIZE;
}
}
#endif
/* Still mapped somewhere else, so just copy it from there. */
memcpy(dest, store->blob_store + pnum_mapped_at, REPOPAGE_BLOBSIZE);
- store->mapped[pnum_mapped_at / REPOPAGE_BLOBSIZE] = 0;
+ store->mapped[pnum_mapped_at / REPOPAGE_BLOBSIZE] = -1;
}
}
else
#ifdef DEBUG_PAGING
fprintf(stderr, "PAGEIN: %d to %d", pnum, i);
#endif
+#ifndef _WIN32
if (pread(store->pagefd, compressed ? buf : dest, in_len, store->file_offset + p->page_offset) != in_len)
{
perror("mapping pread");
return 0;
}
+#else
+ DWORD read_len;
+ OVERLAPPED ovlp = {0};
+ ovlp.Offset = store->file_offset + p->page_offset;
+ if (!ReadFile((HANDLE) _get_osfhandle(store->pagefd), compressed ? buf : dest, in_len, &read_len, &ovlp) || read_len != in_len)
+ {
+ perror("mapping ReadFile");
+ return 0;
+ }
+#endif
if (compressed)
{
unsigned int out_len;
#endif
}
store->mapped_at[pnum] = i * REPOPAGE_BLOBSIZE;
- store->mapped[i] = pnum + 1;
+ store->mapped[i] = pnum;
}
return store->blob_store + best * REPOPAGE_BLOBSIZE;
}
return compress_buf(page, len, cpage, max);
}
+unsigned int
+repopagestore_decompress_page(const unsigned char *cpage, unsigned int len, unsigned char *page, unsigned int max)
+{
+ unsigned int l = check_decompress_buf(cpage, len);
+ if (l == 0 || l > max)
+ return 0;
+ return unchecked_decompress_buf(cpage, len, page, max);
+}
+
+
#define SOLV_ERROR_EOF 3
#define SOLV_ERROR_CORRUPT 6
read_u32(FILE *fp)
{
int c, i;
- unsigned int x = 0;
+ unsigned int x = 0;
- for (i = 0; i < 4; i++)
- {
+ for (i = 0; i < 4; i++)
+ {
c = getc(fp);
- if (c == EOF)
+ if (c == EOF)
return 0;
- x = (x << 8) | c;
- }
+ x = (x << 8) | c;
+ }
return x;
}
if (store->pagefd == -1)
can_seek = 0;
else
- fcntl(store->pagefd, F_SETFD, FD_CLOEXEC);
+ solv_setcloexec(store->pagefd, 1);
#ifdef DEBUG_PAGING
fprintf(stderr, "can %sseek\n", can_seek ? "" : "NOT ");