2 * Copyright (c) 2007-2008, Novell Inc.
4 * This program is licensed under the BSD license, read LICENSE.BSD
5 * for further information
11 * Pageing and compression functions for the vertical repository data
15 #define _XOPEN_SOURCE 500
17 #include <sys/types.h>
30 #define BLOCK_SIZE (65536*1)
31 #if BLOCK_SIZE <= 65536
32 typedef __uint16_t Ref;
34 typedef __uint32_t Ref;
38 The format is tailored for fast decompression (i.e. only byte based),
39 and skewed to ASCII content (highest bit often not set):
42 - self-describing ASCII character hex L
43 b 100lllll <l+1 bytes>
44 - literal run of length l+1
46 - back ref of length l+2, at offset -(o+1) (o < 1 << 10)
48 - back ref of length l+2+8, at offset -(o+1) (o < 1 << 8)
50 - back ref of length l+3, at offset -(o+1) (o < 1 << 16)
51 f1 1111llll <8l> <8o> <8o>
52 - back ref, length l+19 (l < 1<<12), offset -(o+1) (o < 1<<16)
53 f2 11110lll <8l> <8o> <8o>
54 - back ref, length l+19 (l < 1<<11), offset -(o+1) (o < 1<<16)
55 g 11111lll <8l> <8o> <8o> <8o>
56 - back ref, length l+5 (l < 1<<11), offset -(o+1) (o < 1<<24)
58 Generally for a literal of length L we need L+1 bytes, hence it is
59 better to encode also very short backrefs (2 chars) as backrefs if
60 their offset is small, as that only needs two bytes. Except if we
61 already have a literal run, in that case it's better to append there,
62 instead of breaking it for a backref. So given a potential backref
63 at offset O, length L the strategy is as follows:
65 L < 2 : encode as 1-literal
66 L == 2, O > 1024 : encode as 1-literal
67 L == 2, have already literals: encode as 1-literal
69 L >= 2, L <= 9, O < 1024 : encode as c
70 L >= 10, L <= 41, O < 256 : encode as d
71 else we have either O >= 1024, or L >= 42:
72 L < 3 : encode as 1-literal
73 L >= 3, L <= 18, O < 65536 : encode as e
74 L >= 19, L <= 4095+18, O < 65536 : encode as f
75 else we have either L >= 4096+18 or O >= 65536.
76 O >= 65536: encode as 1-literal, too bad
77 (with the current block size this can't happen)
78 L >= 4096+18, so reduce to 4095+18 : encode as f
83 compress_buf(const unsigned char *in, unsigned int in_len,
84 unsigned char *out, unsigned int out_len)
86 unsigned int oo = 0; //out-offset
87 unsigned int io = 0; //in-offset
90 Ref hnext[BLOCK_SIZE];
91 memset (htab, -1, sizeof (htab));
92 memset (hnext, -1, sizeof (hnext));
93 unsigned int litofs = 0;
94 while (io + 2 < in_len)
96 /* Search for a match of the string starting at IN, we have at
97 least three characters. */
98 unsigned int hval = in[io] | in[io + 1] << 8 | in[io + 2] << 16;
99 unsigned int try, mlen, mofs, tries;
100 hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5;
101 hval = hval & (HS - 1);
103 hnext[io] = htab[hval];
108 for (tries = 0; try != -1 && tries < 12; tries++)
111 && in[try] == in[io] && in[try + 1] == in[io + 1])
114 mofs = (io - try) - 1;
119 for (; try != -1 && tries < 12; tries++)
121 //assert (mlen >= 2);
122 //assert (io + mlen < in_len);
123 /* Try a match starting from [io] with the strings at [try].
124 That's only sensible if TRY actually is before IO (can happen
125 with uninit hash table). If we have a previous match already
126 we're only going to take the new one if it's longer, hence
127 check the potentially last character. */
128 if (try < io && in[try + mlen] == in[io + mlen])
130 unsigned int this_len, this_ofs;
131 if (memcmp (in + try, in + io, mlen))
134 /* Now try extending the match by more characters. */
136 io + this_len < in_len
137 && in[try + this_len] == in[io + this_len]; this_len++)
141 for (testi = 0; testi < this_len; testi++)
142 assert (in[try + testi] == in[io + testi]);
144 this_ofs = (io - try) - 1;
145 /*if (this_ofs > 65535)
148 assert (this_len >= 2);
149 assert (this_len >= mlen);
150 assert (this_len > mlen || (this_len == mlen && this_ofs > mofs));
152 mlen = this_len, mofs = this_ofs;
153 /* If our match extends up to the end of input, no next
154 match can become better. This is not just an
155 optimization, it establishes a loop invariant
156 (io + mlen < in_len). */
157 if (io + mlen >= in_len)
162 /*if (io - try - 1 >= 65536)
169 //fprintf (stderr, "%d %d\n", mlen, mofs);
170 if (mlen == 2 && (litofs || mofs >= 1024))
172 /*else if (mofs >= 65536)
174 else if (mofs >= 65536)
176 if (mlen >= 2048 + 5)
183 /*else if (mlen >= 4096 + 19)
185 else if (mlen >= 2048 + 19)
187 /* Skip this match if the next character would deliver a better one,
188 but only do this if we have the chance to really extend the
189 length (i.e. our current length isn't yet the (conservative)
191 if (mlen && mlen < (2048 + 5) && io + 3 < in_len)
194 in[io + 1] | in[io + 2] << 8 | in[io + 3] << 16;
196 hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5;
197 hval = hval & (HS - 1);
200 && in[try] == in[io + 1] && in[try + 1] == in[io + 2])
202 unsigned int this_len;
205 io + 1 + this_len < in_len
206 && in[try + this_len] == in[io + 1 + this_len];
209 if (this_len >= mlen)
225 unsigned litlen = io - litofs;
226 //fprintf (stderr, "lit: %d\n", litlen);
229 unsigned int easy_sz;
230 /* Emit everything we can as self-describers. As soon as
231 we hit a byte we can't emit as such we're going to emit
232 a length descriptor anyway, so we can as well include
233 bytes < 0x80 which might follow afterwards in that run. */
235 easy_sz < litlen && in[litofs + easy_sz] < 0x80;
240 if (oo + easy_sz >= out_len)
242 memcpy (out + oo, in + litofs, easy_sz);
251 if (oo + 1 + litlen >= out_len)
253 out[oo++] = 0x80 | (litlen - 1);
255 out[oo++] = in[litofs++];
260 /* Literal length > 32, so chunk it. */
261 if (oo + 1 + 32 >= out_len)
263 out[oo++] = 0x80 | 31;
264 memcpy (out + oo, in + litofs, 32);
273 //fprintf (stderr, "ref: %d @ %d\n", mlen, mofs);
275 if (mlen >= 2 && mlen <= 9 && mofs < 1024)
277 if (oo + 2 >= out_len)
279 out[oo++] = 0xa0 | ((mofs & 0x300) >> 5) | (mlen - 2);
280 out[oo++] = mofs & 0xff;
282 else if (mlen >= 10 && mlen <= 41 && mofs < 256)
284 if (oo + 2 >= out_len)
286 out[oo++] = 0xc0 | (mlen - 10);
289 else if (mofs >= 65536)
291 assert (mlen >= 5 && mlen < 2048 + 5);
292 if (oo + 5 >= out_len)
294 out[oo++] = 0xf8 | ((mlen - 5) >> 8);
295 out[oo++] = (mlen - 5) & 0xff;
296 out[oo++] = mofs & 0xff;
297 out[oo++] = (mofs >> 8) & 0xff;
298 out[oo++] = mofs >> 16;
300 else if (mlen >= 3 && mlen <= 18)
302 assert (mofs < 65536);
303 if (oo + 3 >= out_len)
305 out[oo++] = 0xe0 | (mlen - 3);
306 out[oo++] = mofs & 0xff;
307 out[oo++] = mofs >> 8;
311 assert (mlen >= 19 && mlen <= 4095 + 19 && mofs < 65536);
312 if (oo + 4 >= out_len)
314 out[oo++] = 0xf0 | ((mlen - 19) >> 8);
315 out[oo++] = (mlen - 19) & 0xff;
316 out[oo++] = mofs & 0xff;
317 out[oo++] = mofs >> 8;
319 /* Insert the hashes for the compressed run [io..io+mlen-1].
320 For [io] we have it already done at the start of the loop.
321 So it's from [io+1..io+mlen-1], and we need three chars per
322 hash, so the accessed characters will be [io+1..io+mlen-1+2],
323 ergo io+mlen+1 < in_len. */
331 in[io] | in[io + 1] << 8 | in[io + 2] << 16;
332 hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5;
333 hval = hval & (HS - 1);
334 hnext[io] = htab[hval];
341 /* We might have some characters left. */
342 if (io < in_len && !litofs)
348 unsigned litlen = io - litofs;
349 //fprintf (stderr, "lit: %d\n", litlen);
352 unsigned int easy_sz;
353 /* Emit everything we can as self-describers. As soon as we hit a
354 byte we can't emit as such we're going to emit a length
355 descriptor anyway, so we can as well include bytes < 0x80 which
356 might follow afterwards in that run. */
357 for (easy_sz = 0; easy_sz < litlen && in[litofs + easy_sz] < 0x80;
362 if (oo + easy_sz >= out_len)
364 memcpy (out + oo, in + litofs, easy_sz);
373 if (oo + 1 + litlen >= out_len)
375 out[oo++] = 0x80 | (litlen - 1);
377 out[oo++] = in[litofs++];
382 /* Literal length > 32, so chunk it. */
383 if (oo + 1 + 32 >= out_len)
385 out[oo++] = 0x80 | 31;
386 memcpy (out + oo, in + litofs, 32);
398 unchecked_decompress_buf(const unsigned char *in, unsigned int in_len,
400 unsigned int out_len __attribute__((unused)))
402 unsigned char *orig_out = out;
403 const unsigned char *in_end = in + in_len;
406 unsigned int first = *in++;
411 /* This default case can't happen, but GCCs VRP is not strong
412 enough to see this, so make this explicitely not fall to
413 the end of the switch, so that we don't have to initialize
421 //fprintf (stderr, "lit: 1\n");
425 //b 100lllll <l+1 bytes>
427 unsigned int l = first & 31;
428 //fprintf (stderr, "lit: %d\n", l);
437 o = first & (3 << 3);
438 o = (o << 5) | *in++;
439 first = (first & 7) + 2;
446 first = (first & 31) + 10;
450 // e 1110llll <8o> <8o>
452 o = in[0] | (in[1] << 8);
459 //f1 1111llll <8o> <8o> <8l>
460 //f2 11110lll <8o> <8o> <8l>
461 // g 11111lll <8o> <8o> <8o> <8l>
466 first = (((first - 8) << 8) | in[0]) + 5;
467 o = in[1] | (in[2] << 8) | (in[3] << 16);
472 first = ((first << 8) | in[0]) + 19;
473 o = in[1] | (in[2] << 8);
479 //fprintf (stderr, "ref: %d @ %d\n", first, o);
483 /* We know that first will not be zero, and this loop structure is
484 better optimizable. */
494 case 18: *out = *(out + o); out++;
495 case 17: *out = *(out + o); out++;
496 case 16: *out = *(out + o); out++;
497 case 15: *out = *(out + o); out++;
498 case 14: *out = *(out + o); out++;
499 case 13: *out = *(out + o); out++;
500 case 12: *out = *(out + o); out++;
501 case 11: *out = *(out + o); out++;
502 case 10: *out = *(out + o); out++;
503 case 9: *out = *(out + o); out++;
504 case 8: *out = *(out + o); out++;
505 case 7: *out = *(out + o); out++;
506 case 6: *out = *(out + o); out++;
507 case 5: *out = *(out + o); out++;
508 case 4: *out = *(out + o); out++;
509 case 3: *out = *(out + o); out++;
510 case 2: *out = *(out + o); out++;
511 case 1: *out = *(out + o); out++;
519 case 0: *out = *(out + o); out++;
520 case 15: *out = *(out + o); out++;
521 case 14: *out = *(out + o); out++;
522 case 13: *out = *(out + o); out++;
523 case 12: *out = *(out + o); out++;
524 case 11: *out = *(out + o); out++;
525 case 10: *out = *(out + o); out++;
526 case 9: *out = *(out + o); out++;
527 case 8: *out = *(out + o); out++;
528 case 7: *out = *(out + o); out++;
529 case 6: *out = *(out + o); out++;
530 case 5: *out = *(out + o); out++;
531 case 4: *out = *(out + o); out++;
532 case 3: *out = *(out + o); out++;
533 case 2: *out = *(out + o); out++;
534 case 1: *out = *(out + o); out++;
536 while ((int)(first -= 16) > 0);
542 return out - orig_out;
545 /**********************************************************************/
547 void repopagestore_init(Repopagestore *store)
549 memset(store, 0, sizeof(*store));
553 void repopagestore_free(Repopagestore *store)
555 sat_free(store->blob_store);
556 sat_free(store->pages);
557 sat_free(store->mapped);
558 if (store->pagefd != -1)
559 close(store->pagefd);
563 /**********************************************************************/
566 repopagestore_load_page_range(Repopagestore *store, unsigned int pstart, unsigned int pend)
568 /* Make sure all pages from PSTART to PEND (inclusive) are loaded,
569 and are consecutive. Return a pointer to the mapping of PSTART. */
570 unsigned char buf[BLOB_PAGESIZE];
573 /* Quick check in case all pages are there already and consecutive. */
574 for (i = pstart; i <= pend; i++)
575 if (store->pages[i].mapped_at == -1
577 && store->pages[i].mapped_at
578 != store->pages[i-1].mapped_at + BLOB_PAGESIZE))
581 return store->blob_store + store->pages[pstart].mapped_at;
583 if (store->pagefd == -1)
586 /* Ensure that we can map the numbers of pages we need at all. */
587 if (pend - pstart + 1 > store->ncanmap)
589 unsigned int oldcan = store->ncanmap;
590 store->ncanmap = pend - pstart + 1;
591 if (store->ncanmap < 4)
593 store->mapped = sat_realloc2(store->mapped, store->ncanmap, sizeof(store->mapped[0]));
594 memset (store->mapped + oldcan, 0, (store->ncanmap - oldcan) * sizeof (store->mapped[0]));
595 store->blob_store = sat_realloc2(store->blob_store, store->ncanmap, BLOB_PAGESIZE);
597 fprintf (stderr, "PAGE: can map %d pages\n", store->ncanmap);
601 /* Now search for "cheap" space in our store. Space is cheap if it's either
602 free (very cheap) or contains pages we search for anyway. */
604 /* Setup cost array. */
605 unsigned int cost[store->ncanmap];
606 for (i = 0; i < store->ncanmap; i++)
608 unsigned int pnum = store->mapped[i];
614 Attrblobpage *p = store->pages + pnum;
615 assert (p->mapped_at != -1);
616 if (pnum >= pstart && pnum <= pend)
623 /* And search for cheapest space. */
624 unsigned int best_cost = -1;
625 unsigned int best = 0;
626 unsigned int same_cost = 0;
627 for (i = 0; i + pend - pstart < store->ncanmap; i++)
629 unsigned int c = cost[i];
631 for (j = 0; j < pend - pstart + 1; j++)
634 best_cost = c, best = i;
635 else if (c == best_cost)
637 /* A null cost won't become better. */
641 /* If all places have the same cost we would thrash on slot 0. Avoid
642 this by doing a round-robin strategy in this case. */
643 if (same_cost == store->ncanmap - pend + pstart - 1)
644 best = store->rr_counter++ % (store->ncanmap - pend + pstart);
646 /* So we want to map our pages from [best] to [best+pend-pstart].
647 Use a very simple strategy, which doesn't make the best use of
648 our resources, but works. Throw away all pages in that range
649 (even ours) then copy around ours (in case they were outside the
650 range) or read them in. */
651 for (i = best; i < best + pend - pstart + 1; i++)
653 unsigned int pnum = store->mapped[i];
655 /* If this page is exactly at the right place already,
656 no need to evict it. */
657 && pnum != pstart + i - best)
659 /* Evict this page. */
661 fprintf (stderr, "PAGE: evict page %d from %d\n", pnum, i);
664 store->mapped[i] = 0;
665 store->pages[pnum].mapped_at = -1;
669 /* Everything is free now. Read in the pages we want. */
670 for (i = pstart; i <= pend; i++)
672 Attrblobpage *p = store->pages + i;
673 unsigned int pnum = i - pstart + best;
674 void *dest = store->blob_store + pnum * BLOB_PAGESIZE;
675 if (p->mapped_at != -1)
677 if (p->mapped_at != pnum * BLOB_PAGESIZE)
680 fprintf (stderr, "PAGECOPY: %d to %d\n", i, pnum);
682 /* Still mapped somewhere else, so just copy it from there. */
683 memcpy (dest, store->blob_store + p->mapped_at, BLOB_PAGESIZE);
684 store->mapped[p->mapped_at / BLOB_PAGESIZE] = 0;
689 unsigned int in_len = p->file_size;
690 unsigned int compressed = in_len & 1;
693 fprintf (stderr, "PAGEIN: %d to %d", i, pnum);
695 if (pread(store->pagefd, compressed ? buf : dest, in_len, p->file_offset) != in_len)
697 perror ("mapping pread");
702 unsigned int out_len;
703 out_len = unchecked_decompress_buf(buf, in_len,
704 dest, BLOB_PAGESIZE);
705 if (out_len != BLOB_PAGESIZE && i < store->num_pages - 1)
707 fprintf(stderr, "can't decompress\n");
711 fprintf (stderr, " (expand %d to %d)", in_len, out_len);
715 fprintf (stderr, "\n");
718 p->mapped_at = pnum * BLOB_PAGESIZE;
719 store->mapped[pnum] = i + 1;
721 return store->blob_store + best * BLOB_PAGESIZE;
725 repopagestore_compress_page(unsigned char *page, unsigned int len, unsigned char *cpage, unsigned int max)
727 return compress_buf(page, len, cpage, max);
730 #define SOLV_ERROR_EOF 3
731 #define SOLV_ERROR_CORRUPT 6
733 static inline unsigned int
739 for (i = 0; i < 4; i++)
749 /* Try to either setup on-demand paging (using FP as backing
750 file), or in case that doesn't work (FP not seekable) slurps in
751 all pages and deactivates paging. */
753 repopagestore_read_or_setup_pages(Repopagestore *store, FILE *fp, unsigned int pagesz, unsigned int blobsz)
757 unsigned int can_seek;
759 unsigned char buf[BLOB_PAGESIZE];
761 if (pagesz != BLOB_PAGESIZE)
763 /* We could handle this by slurping in everything. */
764 return SOLV_ERROR_CORRUPT;
767 if ((cur_file_ofs = ftell(fp)) < 0)
771 store->pagefd = dup(fileno(fp));
772 if (store->pagefd == -1)
776 fprintf (stderr, "can %sseek\n", can_seek ? "" : "NOT ");
778 npages = (blobsz + BLOB_PAGESIZE - 1) / BLOB_PAGESIZE;
780 store->num_pages = npages;
781 store->pages = sat_malloc2(npages, sizeof(store->pages[0]));
783 /* If we can't seek on our input we have to slurp in everything. */
785 store->blob_store = sat_malloc2(npages, BLOB_PAGESIZE);
786 for (i = 0; i < npages; i++)
788 unsigned int in_len = read_u32(fp);
789 unsigned int compressed = in_len & 1;
790 Attrblobpage *p = store->pages + i;
793 fprintf (stderr, "page %d: len %d (%scompressed)\n",
794 i, in_len, compressed ? "" : "not ");
800 p->file_offset = cur_file_ofs;
801 p->file_size = in_len * 2 + compressed;
802 if (fseek(fp, in_len, SEEK_CUR) < 0)
805 fprintf (stderr, "can't seek after we thought we can\n");
806 /* We can't fall back to non-seeking behaviour as we already
807 read over some data pages without storing them away. */
808 close(store->pagefd);
810 return SOLV_ERROR_EOF;
812 cur_file_ofs += in_len;
816 unsigned int out_len;
817 void *dest = store->blob_store + i * BLOB_PAGESIZE;
818 p->mapped_at = i * BLOB_PAGESIZE;
821 /* We can't seek, so suck everything in. */
822 if (fread(compressed ? buf : dest, in_len, 1, fp) != 1)
825 return SOLV_ERROR_EOF;
829 out_len = unchecked_decompress_buf(buf, in_len, dest, BLOB_PAGESIZE);
830 if (out_len != BLOB_PAGESIZE && i < npages - 1)
832 return SOLV_ERROR_CORRUPT;
841 repopagestore_disable_paging(Repopagestore *store)
843 if (store->num_pages)
844 repopagestore_load_page_range(store, 0, store->num_pages - 1);
850 transfer_file (FILE * from, FILE * to, int compress)
852 unsigned char inb[BLOCK_SIZE];
853 unsigned char outb[BLOCK_SIZE];
854 while (!feof (from) && !ferror (from))
856 unsigned int in_len, out_len;
859 in_len = fread (inb, 1, BLOCK_SIZE, from);
862 unsigned char *b = outb;
863 out_len = compress_buf (inb, in_len, outb, sizeof (outb));
865 b = inb, out_len = in_len;
866 if (fwrite (&out_len, sizeof (out_len), 1, to) != 1)
868 perror ("write size");
871 if (fwrite (b, out_len, 1, to) != 1)
873 perror ("write data");
880 if (fread (&in_len, sizeof (in_len), 1, from) != 1)
884 perror ("can't read size");
887 if (fread (inb, in_len, 1, from) != 1)
889 perror ("can't read data");
893 unchecked_decompress_buf (inb, in_len, outb, sizeof (outb));
894 if (fwrite (outb, out_len, 1, to) != 1)
896 perror ("can't write output");
903 /* Just for benchmarking purposes. */
905 dumb_memcpy (void *dest, const void *src, unsigned int len)
914 benchmark (FILE * from)
916 unsigned char inb[BLOCK_SIZE];
917 unsigned char outb[BLOCK_SIZE];
918 unsigned int in_len = fread (inb, 1, BLOCK_SIZE, from);
919 unsigned int out_len;
922 perror ("can't read from input");
926 unsigned int calib_loop;
927 unsigned int per_loop;
936 while ((clock () - start) < CLOCKS_PER_SEC / 4)
939 for (i = 0; i < calib_loop; i++)
940 dumb_memcpy (outb, inb, in_len);
941 per_loop += calib_loop;
944 fprintf (stderr, "memcpy:\nCalibrated to %d iterations per loop\n",
948 for (i = 0; i < 10; i++)
949 for (j = 0; j < per_loop; j++)
950 dumb_memcpy (outb, inb, in_len);
952 seconds = (end - start) / (float) CLOCKS_PER_SEC;
953 fprintf (stderr, "%.2f seconds == %.2f MB/s\n", seconds,
954 ((long long) in_len * per_loop * 10) / (1024 * 1024 * seconds));
960 while ((clock () - start) < CLOCKS_PER_SEC / 4)
963 for (i = 0; i < calib_loop; i++)
964 compress_buf(inb, in_len, outb, sizeof (outb));
965 per_loop += calib_loop;
968 fprintf(stderr, "compression:\nCalibrated to %d iterations per loop\n",
972 for (i = 0; i < 10; i++)
973 for (j = 0; j < per_loop; j++)
974 compress_buf (inb, in_len, outb, sizeof (outb));
976 seconds = (end - start) / (float) CLOCKS_PER_SEC;
977 fprintf(stderr, "%.2f seconds == %.2f MB/s\n", seconds,
978 ((long long) in_len * per_loop * 10) / (1024 * 1024 * seconds));
980 out_len = compress_buf(inb, in_len, outb, sizeof (outb));
985 while ((clock () - start) < CLOCKS_PER_SEC / 4)
988 for (i = 0; i < calib_loop; i++)
989 unchecked_decompress_buf(outb, out_len, inb, sizeof (inb));
990 per_loop += calib_loop;
993 fprintf(stderr, "decompression:\nCalibrated to %d iterations per loop\n",
997 for (i = 0; i < 10; i++)
998 for (j = 0; j < per_loop; j++)
999 unchecked_decompress_buf(outb, out_len, inb, sizeof (inb));
1001 seconds = (end - start) / (float) CLOCKS_PER_SEC;
1002 fprintf(stderr, "%.2f seconds == %.2f MB/s\n", seconds,
1003 ((long long) in_len * per_loop * 10) / (1024 * 1024 * seconds));
1007 main (int argc, char *argv[])
1010 if (argc > 1 && !strcmp (argv[1], "-d"))
1012 if (argc > 1 && !strcmp (argv[1], "-b"))
1015 transfer_file (stdin, stdout, compress);