/*
- * Copyright (c) 2007-2008, Novell Inc.
+ * Copyright (c) 2007-2012, Novell Inc.
*
* This program is licensed under the BSD license, read LICENSE.BSD
* for further information
#define _XOPEN_SOURCE 500
#include <sys/types.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BLOCK_SIZE (65536*1)
#if BLOCK_SIZE <= 65536
-typedef __uint16_t Ref;
+typedef uint16_t Ref;
#else
-typedef __uint32_t Ref;
+typedef uint32_t Ref;
#endif
/*
The format is tailored for fast decompression (i.e. only byte based),
and skewed to ASCII content (highest bit often not set):
-
+
a 0LLLLLLL
- self-describing ASCII character hex L
b 100lllll <l+1 bytes>
compress_buf(const unsigned char *in, unsigned int in_len,
unsigned char *out, unsigned int out_len)
{
- unsigned int oo = 0; //out-offset
- unsigned int io = 0; //in-offset
+ unsigned int oo = 0; /* out-offset */
+ unsigned int io = 0; /* in-offset */
#define HS (65536)
Ref htab[HS];
Ref hnext[BLOCK_SIZE];
+ unsigned int litofs = 0;
memset(htab, -1, sizeof (htab));
memset(hnext, -1, sizeof (hnext));
- unsigned int litofs = 0;
while (io + 2 < in_len)
{
/* Search for a match of the string starting at IN, we have at
}
for (; try != -1 && tries < 12; tries++)
{
- //assert(mlen >= 2);
- //assert(io + mlen < in_len);
+ /* assert(mlen >= 2); */
+ /* assert(io + mlen < in_len); */
/* Try a match starting from [io] with the strings at [try].
That's only sensible if TRY actually is before IO (can happen
with uninit hash table). If we have a previous match already
{
if (litofs)
{
+ unsigned litlen;
litofs--;
- unsigned litlen = io - litofs;
- //fprintf(stderr, "lit: %d\n", litlen);
+ litlen = io - litofs;
+ /* fprintf(stderr, "lit: %d\n", litlen); */
while (litlen)
{
unsigned int easy_sz;
litofs = 0;
}
- //fprintf(stderr, "ref: %d @ %d\n", mlen, mofs);
+ /* fprintf(stderr, "ref: %d @ %d\n", mlen, mofs); */
if (mlen >= 2 && mlen <= 9 && mofs < 1024)
{
io = in_len;
if (litofs)
{
+ unsigned litlen;
litofs--;
- unsigned litlen = io - litofs;
- //fprintf(stderr, "lit: %d\n", litlen);
+ litlen = io - litofs;
+ /* fprintf(stderr, "lit: %d\n", litlen); */
while (litlen)
{
unsigned int easy_sz;
case 2: case 3:
case 4: case 5:
case 6: case 7:
- //a 0LLLLLLL
- //fprintf (stderr, "lit: 1\n");
+ /* a 0LLLLLLL */
+ /* fprintf (stderr, "lit: 1\n"); */
*out++ = first;
continue;
case 8: case 9:
- //b 100lllll <l+1 bytes>
+ /* b 100lllll <l+1 bytes> */
{
unsigned int l = first & 31;
- //fprintf (stderr, "lit: %d\n", l);
+ /* fprintf (stderr, "lit: %d\n", l); */
do
*out++ = *in++;
while (l--);
continue;
}
case 10: case 11:
- //c 101oolll <8o>
+ /* c 101oolll <8o> */
{
o = first & (3 << 3);
o = (o << 5) | *in++;
break;
}
case 12: case 13:
- //d 110lllll <8o>
+ /* d 110lllll <8o> */
{
o = *in++;
first = (first & 31) + 10;
break;
}
case 14:
- // e 1110llll <8o> <8o>
+ /* e 1110llll <8o> <8o> */
{
o = in[0] | (in[1] << 8);
in += 2;
break;
}
case 15:
- //f1 1111llll <8o> <8o> <8l>
- //f2 11110lll <8o> <8o> <8l>
- // g 11111lll <8o> <8o> <8o> <8l>
+ /* f1 1111llll <8o> <8o> <8l> */
+ /* f2 11110lll <8o> <8o> <8l> */
+ /* g 11111lll <8o> <8o> <8o> <8l> */
{
first = first & 15;
if (first >= 8)
break;
}
}
- //fprintf(stderr, "ref: %d @ %d\n", first, o);
+ /* fprintf(stderr, "ref: %d @ %d\n", first, o); */
o++;
o = -o;
#if 0
void repopagestore_free(Repopagestore *store)
{
- sat_free(store->blob_store);
- sat_free(store->pages);
- sat_free(store->mapped);
+ store->blob_store = solv_free(store->blob_store);
+ store->file_pages = solv_free(store->file_pages);
+ store->mapped_at = solv_free(store->mapped_at);
+ store->mapped = solv_free(store->mapped);
if (store->pagefd != -1)
close(store->pagefd);
+ store->pagefd = -1;
}
{
/* Make sure all pages from PSTART to PEND (inclusive) are loaded,
and are consecutive. Return a pointer to the mapping of PSTART. */
- unsigned char buf[BLOB_PAGESIZE];
- unsigned int i;
+ unsigned char buf[REPOPAGE_BLOBSIZE];
+ unsigned int i, best, pnum;
+
+ if (pstart == pend)
+ {
+ /* Quick check in case the requested page is already mapped */
+ if (store->mapped_at[pstart] != -1)
+ return store->blob_store + store->mapped_at[pstart];
+ }
+ else
+ {
+ /* Quick check in case all pages are already mapped and consecutive. */
+ for (pnum = pstart; pnum <= pend; pnum++)
+ if (store->mapped_at[pnum] == -1
+ || (pnum > pstart
+ && store->mapped_at[pnum]
+ != store->mapped_at[pnum-1] + REPOPAGE_BLOBSIZE))
+ break;
+ if (pnum > pend)
+ return store->blob_store + store->mapped_at[pstart];
+ }
- /* Quick check in case all pages are there already and consecutive. */
- for (i = pstart; i <= pend; i++)
- if (store->pages[i].mapped_at == -1
- || (i > pstart
- && store->pages[i].mapped_at
- != store->pages[i-1].mapped_at + BLOB_PAGESIZE))
- break;
- if (i > pend)
- return store->blob_store + store->pages[pstart].mapped_at;
+ if (store->pagefd == -1 || !store->file_pages)
+ return 0; /* no backing file */
- if (store->pagefd == -1)
- return 0;
+#ifdef DEBUG_PAGING
+ fprintf(stderr, "PAGE: want %d pages starting at %d\n", pend - pstart + 1, pstart);
+#endif
/* Ensure that we can map the numbers of pages we need at all. */
- if (pend - pstart + 1 > store->ncanmap)
+ if (pend - pstart + 1 > store->nmapped)
{
- unsigned int oldcan = store->ncanmap;
- store->ncanmap = pend - pstart + 1;
- if (store->ncanmap < 4)
- store->ncanmap = 4;
- store->mapped = sat_realloc2(store->mapped, store->ncanmap, sizeof(store->mapped[0]));
- memset(store->mapped + oldcan, 0, (store->ncanmap - oldcan) * sizeof (store->mapped[0]));
- store->blob_store = sat_realloc2(store->blob_store, store->ncanmap, BLOB_PAGESIZE);
+ unsigned int oldcan = store->nmapped;
+ store->nmapped = pend - pstart + 1;
+ if (store->nmapped < 4)
+ store->nmapped = 4;
+ store->mapped = solv_realloc2(store->mapped, store->nmapped, sizeof(store->mapped[0]));
+ for (i = oldcan; i < store->nmapped; i++)
+ store->mapped[i] = -1;
+ store->blob_store = solv_realloc2(store->blob_store, store->nmapped, REPOPAGE_BLOBSIZE);
#ifdef DEBUG_PAGING
- fprintf(stderr, "PAGE: can map %d pages\n", store->ncanmap);
+ fprintf(stderr, "PAGE: can map %d pages\n", store->nmapped);
#endif
}
- /* Now search for "cheap" space in our store. Space is cheap if it's either
- free (very cheap) or contains pages we search for anyway. */
-
- /* Setup cost array. */
- unsigned int cost[store->ncanmap];
- for (i = 0; i < store->ncanmap; i++)
+ if (store->mapped_at[pstart] != -1)
{
- unsigned int pnum = store->mapped[i];
- if (pnum == 0)
- cost[i] = 0;
- else
- {
- pnum--;
- Attrblobpage *p = store->pages + pnum;
- assert(p->mapped_at != -1);
- if (pnum >= pstart && pnum <= pend)
- cost[i] = 1;
- else
- cost[i] = 3;
- }
+ /* assume forward search */
+ best = store->mapped_at[pstart] / REPOPAGE_BLOBSIZE;
+ if (best + (pend - pstart) >= store->nmapped)
+ best = 0;
}
-
- /* And search for cheapest space. */
- unsigned int best_cost = -1;
- unsigned int best = 0;
- unsigned int same_cost = 0;
- for (i = 0; i + pend - pstart < store->ncanmap; i++)
+ else if (store->mapped_at[pend] != -1)
{
- unsigned int c = cost[i];
- unsigned int j;
- for (j = 0; j < pend - pstart + 1; j++)
- c += cost[i+j];
- if (c < best_cost)
- best_cost = c, best = i;
- else if (c == best_cost)
- same_cost++;
- /* A null cost won't become better. */
- if (c == 0)
- break;
+ /* assume backward search */
+ best = store->mapped_at[pend] / REPOPAGE_BLOBSIZE;
+ if (best < pend - pstart)
+ best = store->nmapped - 1;
+ best -= pend - pstart;
+ }
+ else
+ {
+ /* choose some "random" location to avoid thrashing */
+ best = (pstart + store->rr_counter++) % (store->nmapped - pend + pstart);
}
- /* If all places have the same cost we would thrash on slot 0. Avoid
- this by doing a round-robin strategy in this case. */
- if (same_cost == store->ncanmap - pend + pstart - 1)
- best = store->rr_counter++ % (store->ncanmap - pend + pstart);
/* So we want to map our pages from [best] to [best+pend-pstart].
Use a very simple strategy, which doesn't make the best use of
our resources, but works. Throw away all pages in that range
- (even ours) then copy around ours (in case they were outside the
- range) or read them in. */
- for (i = best; i < best + pend - pstart + 1; i++)
+ (even ours) then copy around ours or read them in. */
+ for (i = best, pnum = pstart; pnum <= pend; i++, pnum++)
{
- unsigned int pnum = store->mapped[i];
- if (pnum--
- /* If this page is exactly at the right place already,
- no need to evict it. */
- && pnum != pstart + i - best)
+ unsigned int pnum_mapped_at;
+ unsigned int oldpnum = store->mapped[i];
+ if (oldpnum != -1)
{
+ if (oldpnum == pnum)
+ continue; /* already have the correct page */
/* Evict this page. */
#ifdef DEBUG_PAGING
- fprintf(stderr, "PAGE: evict page %d from %d\n", pnum, i);
+ fprintf(stderr, "PAGE: evict page %d from %d\n", oldpnum, i);
#endif
- cost[i] = 0;
- store->mapped[i] = 0;
- store->pages[pnum].mapped_at = -1;
+ store->mapped[i] = -1;
+ store->mapped_at[oldpnum] = -1;
+ }
+ /* check if we can copy the correct content (before it gets evicted) */
+ pnum_mapped_at = store->mapped_at[pnum];
+ if (pnum_mapped_at != -1 && pnum_mapped_at != i * REPOPAGE_BLOBSIZE)
+ {
+ void *dest = store->blob_store + i * REPOPAGE_BLOBSIZE;
+#ifdef DEBUG_PAGING
+ fprintf(stderr, "PAGECOPY: %d from %d to %d\n", pnum, pnum_mapped_at / REPOPAGE_BLOBSIZE, i);
+#endif
+ memcpy(dest, store->blob_store + pnum_mapped_at, REPOPAGE_BLOBSIZE);
+ store->mapped[pnum_mapped_at / REPOPAGE_BLOBSIZE] = -1;
+ store->mapped[i] = pnum;
+ store->mapped_at[pnum] = i * REPOPAGE_BLOBSIZE;
}
}
- /* Everything is free now. Read in the pages we want. */
- for (i = pstart; i <= pend; i++)
+ /* Everything is free now. Read in or copy the pages we want. */
+ for (i = best, pnum = pstart; pnum <= pend; i++, pnum++)
{
- Attrblobpage *p = store->pages + i;
- unsigned int pnum = i - pstart + best;
- void *dest = store->blob_store + pnum * BLOB_PAGESIZE;
- if (p->mapped_at != -1)
+ void *dest = store->blob_store + i * REPOPAGE_BLOBSIZE;
+ if (store->mapped_at[pnum] != -1)
{
- if (p->mapped_at != pnum * BLOB_PAGESIZE)
+ unsigned int pnum_mapped_at = store->mapped_at[pnum];
+ if (pnum_mapped_at != i * REPOPAGE_BLOBSIZE)
{
#ifdef DEBUG_PAGING
- fprintf(stderr, "PAGECOPY: %d to %d\n", i, pnum);
+ fprintf(stderr, "PAGECOPY: %d from %d to %d\n", pnum, pnum_mapped_at / REPOPAGE_BLOBSIZE, i);
#endif
/* Still mapped somewhere else, so just copy it from there. */
- memcpy(dest, store->blob_store + p->mapped_at, BLOB_PAGESIZE);
- store->mapped[p->mapped_at / BLOB_PAGESIZE] = 0;
+ memcpy(dest, store->blob_store + pnum_mapped_at, REPOPAGE_BLOBSIZE);
+ store->mapped[pnum_mapped_at / REPOPAGE_BLOBSIZE] = -1;
}
}
else
{
- unsigned int in_len = p->file_size;
+ Attrblobpage *p = store->file_pages + pnum;
+ unsigned int in_len = p->page_size;
unsigned int compressed = in_len & 1;
in_len >>= 1;
#ifdef DEBUG_PAGING
- fprintf(stderr, "PAGEIN: %d to %d", i, pnum);
+ fprintf(stderr, "PAGEIN: %d to %d", pnum, i);
#endif
- if (pread(store->pagefd, compressed ? buf : dest, in_len, p->file_offset) != in_len)
+ if (pread(store->pagefd, compressed ? buf : dest, in_len, store->file_offset + p->page_offset) != in_len)
{
perror("mapping pread");
return 0;
if (compressed)
{
unsigned int out_len;
- out_len = unchecked_decompress_buf(buf, in_len,
- dest, BLOB_PAGESIZE);
- if (out_len != BLOB_PAGESIZE && i < store->num_pages - 1)
+ out_len = unchecked_decompress_buf(buf, in_len, dest, REPOPAGE_BLOBSIZE);
+ if (out_len != REPOPAGE_BLOBSIZE && pnum < store->num_pages - 1)
{
#ifdef DEBUG_PAGING
fprintf(stderr, "can't decompress\n");
fprintf(stderr, "\n");
#endif
}
- p->mapped_at = pnum * BLOB_PAGESIZE;
- store->mapped[pnum] = i + 1;
+ store->mapped_at[pnum] = i * REPOPAGE_BLOBSIZE;
+ store->mapped[i] = pnum;
}
- return store->blob_store + best * BLOB_PAGESIZE;
+ return store->blob_store + best * REPOPAGE_BLOBSIZE;
}
unsigned int
read_u32(FILE *fp)
{
int c, i;
- unsigned int x = 0;
+ unsigned int x = 0;
- for (i = 0; i < 4; i++)
- {
+ for (i = 0; i < 4; i++)
+ {
c = getc(fp);
- if (c == EOF)
+ if (c == EOF)
return 0;
- x = (x << 8) | c;
- }
+ x = (x << 8) | c;
+ }
return x;
}
unsigned int npages;
unsigned int i;
unsigned int can_seek;
- long cur_file_ofs;
- unsigned char buf[BLOB_PAGESIZE];
+ unsigned int cur_page_ofs;
+ unsigned char buf[REPOPAGE_BLOBSIZE];
- if (pagesz != BLOB_PAGESIZE)
+ if (pagesz != REPOPAGE_BLOBSIZE)
{
/* We could handle this by slurping in everything. */
return SOLV_ERROR_CORRUPT;
}
can_seek = 1;
- if ((cur_file_ofs = ftell(fp)) < 0)
+ if ((store->file_offset = ftell(fp)) < 0)
can_seek = 0;
clearerr(fp);
if (can_seek)
#ifdef DEBUG_PAGING
fprintf(stderr, "can %sseek\n", can_seek ? "" : "NOT ");
#endif
- npages = (blobsz + BLOB_PAGESIZE - 1) / BLOB_PAGESIZE;
+ npages = (blobsz + REPOPAGE_BLOBSIZE - 1) / REPOPAGE_BLOBSIZE;
store->num_pages = npages;
- store->pages = sat_malloc2(npages, sizeof(store->pages[0]));
+ store->mapped_at = solv_malloc2(npages, sizeof(*store->mapped_at));
- /* If we can't seek on our input we have to slurp in everything. */
- if (!can_seek)
- store->blob_store = sat_malloc2(npages, BLOB_PAGESIZE);
+ /* If we can't seek on our input we have to slurp in everything.
+ * Otherwise set up file_pages containing offest/length of the
+ * pages */
+ if (can_seek)
+ store->file_pages = solv_malloc2(npages, sizeof(*store->file_pages));
+ else
+ store->blob_store = solv_malloc2(npages, REPOPAGE_BLOBSIZE);
+ cur_page_ofs = 0;
for (i = 0; i < npages; i++)
{
unsigned int in_len = read_u32(fp);
unsigned int compressed = in_len & 1;
- Attrblobpage *p = store->pages + i;
in_len >>= 1;
#ifdef DEBUG_PAGING
fprintf(stderr, "page %d: len %d (%scompressed)\n",
#endif
if (can_seek)
{
- cur_file_ofs += 4;
- p->mapped_at = -1;
- p->file_offset = cur_file_ofs;
- p->file_size = in_len * 2 + compressed;
+ Attrblobpage *p = store->file_pages + i;
+ cur_page_ofs += 4;
+ store->mapped_at[i] = -1; /* not mapped yet */
+ p->page_offset = cur_page_ofs;
+ p->page_size = in_len * 2 + compressed;
if (fseek(fp, in_len, SEEK_CUR) < 0)
{
/* We can't fall back to non-seeking behaviour as we already
store->pagefd = -1;
return SOLV_ERROR_EOF;
}
- cur_file_ofs += in_len;
+ cur_page_ofs += in_len;
}
else
{
unsigned int out_len;
- void *dest = store->blob_store + i * BLOB_PAGESIZE;
- p->mapped_at = i * BLOB_PAGESIZE;
- p->file_offset = 0;
- p->file_size = 0;
+ void *dest = store->blob_store + i * REPOPAGE_BLOBSIZE;
+ store->mapped_at[i] = i * REPOPAGE_BLOBSIZE;
/* We can't seek, so suck everything in. */
if (fread(compressed ? buf : dest, in_len, 1, fp) != 1)
{
}
if (compressed)
{
- out_len = unchecked_decompress_buf(buf, in_len, dest, BLOB_PAGESIZE);
- if (out_len != BLOB_PAGESIZE && i < npages - 1)
+ out_len = unchecked_decompress_buf(buf, in_len, dest, REPOPAGE_BLOBSIZE);
+ if (out_len != REPOPAGE_BLOBSIZE && i < npages - 1)
{
return SOLV_ERROR_CORRUPT;
}