2 * Copyright (c) 2007, Novell Inc.
4 * This program is licensed under the BSD license, read LICENSE.BSD
5 * for further information
8 /* We need FNM_CASEFOLD and strcasestr. */
13 #include <sys/types.h>
24 #include "attr_store.h"
29 #include "attr_store_p.h"
33 /* #define DEBUG_PAGING */
35 #define BLOB_BLOCK 65535
37 #define STRINGSPACE_BLOCK 1023
38 #define STRING_BLOCK 127
39 #define LOCALID_NULL 0
40 #define LOCALID_EMPTY 1
42 static Id add_key (Attrstore *s, Id name, unsigned type, unsigned size);
45 new_store (Pool *pool)
47 static const char *predef_strings[] = {
52 Attrstore *s = calloc (1, sizeof (Attrstore));
54 stringpool_init (&s->ss, predef_strings);
61 str2localid (Attrstore *s, const char *str, int create)
63 return stringpool_str2id (&s->ss, str, create);
67 localid2str(Attrstore *s, LocalId id)
69 return s->ss.stringspace + s->ss.strings[id];
73 setup_dirs (Attrstore *s)
75 static const char *ss_init_strs[] =
82 s->dirtree.dirs = calloc (1024, sizeof (s->dirtree.dirs[0]));
84 s->dirtree.dirs[0].child = 0;
85 s->dirtree.dirs[0].sibling = 0;
86 s->dirtree.dirs[0].name = 0;
87 s->dirtree.dirs[1].child = 0;
88 s->dirtree.dirs[1].sibling = 0;
89 s->dirtree.dirs[1].name = STRID_EMPTY;
91 s->dirtree.dirstack_size = 16;
92 s->dirtree.dirstack = malloc (s->dirtree.dirstack_size * sizeof (s->dirtree.dirstack[0]));
93 s->dirtree.ndirstack = 0;
94 s->dirtree.dirstack[s->dirtree.ndirstack++] = 1; //dir-id of /
96 stringpool_init (&s->dirtree.ss, ss_init_strs);
100 dir_lookup_1 (Attrstore *s, unsigned dir, const char *name, unsigned insert)
107 const char *end = strchrnul (name, '/');
108 nameid = stringpool_strn2id (&s->dirtree.ss, name, end - name, 1);
110 Dir *dirs = s->dirtree.dirs;
111 for (c = dirs[dir].child; c; c = dirs[c].sibling, num++)
112 if (nameid == dirs[c].name)
118 c = s->dirtree.ndirs++;
120 dirs = realloc (dirs, (c + 1024) * sizeof (dirs[0]));
122 dirs[c].sibling = dirs[dir].child;
123 dirs[c].name = nameid;
124 dirs[c].parent = dir;
126 s->dirtree.dirs = dirs;
128 if (!(s->dirtree.ndirstack & 15))
130 s->dirtree.dirstack_size += 16;
131 s->dirtree.dirstack = realloc (s->dirtree.dirstack, s->dirtree.dirstack_size * sizeof (s->dirtree.dirstack[0]));
133 s->dirtree.dirstack[s->dirtree.ndirstack++] = c;
136 unsigned ret = dir_lookup_1 (s, c, end + 1, insert);
141 dir_lookup (Attrstore *s, const char *name, unsigned insert)
143 if (!s->dirtree.ndirs)
146 /* Detect number of common path components. Accept multiple // . */
147 const char *new_start;
149 for (components = 1, new_start = name; components < s->dirtree.ndirstack; )
153 while (*new_start == '/')
155 dirname = stringpool_id2str (&s->dirtree.ss, s->dirtree.dirs[s->dirtree.dirstack[components]].name);
156 for (ofs = 0;; ofs++)
158 char n = new_start[ofs];
159 char d = dirname[ofs];
160 if (d == 0 && (n == 0 || n == '/'))
173 while (*new_start == '/')
177 /* We have always / on the stack. */
178 //assert (ndirstack);
179 //assert (ndirstack >= components);
180 s->dirtree.ndirstack = components;
181 unsigned ret = s->dirtree.dirstack[s->dirtree.ndirstack - 1];
183 ret = dir_lookup_1 (s, ret, new_start, insert);
184 //assert (ret == dirstack[ndirstack - 1]);
189 dir_parent (Attrstore *s, unsigned dir)
191 return s->dirtree.dirs[dir].parent;
195 dir2str (Attrstore *s, unsigned dir, char **str, unsigned *len)
198 Id ids[s->dirtree.dirstack_size + 1];
200 for (i = 0; dir > 1; dir = dir_parent (s, dir), i++)
201 ids[i] = s->dirtree.dirs[dir].name;
204 for (i = 0; i < ii; i++)
205 l += 1 + strlen (stringpool_id2str (&s->dirtree.ss, ids[i]));
216 const char *name = stringpool_id2str (&s->dirtree.ss, ids[i]);
217 dest = mempcpy (dest, name, strlen (name));
224 ensure_entry (Attrstore *s, unsigned int entry)
226 unsigned int old_num = s->entries;
227 if (entry < s->entries)
229 s->entries = entry + 1;
230 if (((old_num + 127) & ~127) != ((s->entries + 127) & ~127))
233 s->attrs = realloc (s->attrs, (((s->entries+127) & ~127) * sizeof (s->attrs[0])));
235 s->attrs = malloc (((s->entries+127) & ~127) * sizeof (s->attrs[0]));
237 memset (s->attrs + old_num, 0, (s->entries - old_num) * sizeof (s->attrs[0]));
241 new_entry (Attrstore *s)
243 if ((s->entries & 127) == 0)
246 s->attrs = realloc (s->attrs, ((s->entries+128) * sizeof (s->attrs[0])));
248 s->attrs = malloc ((s->entries+128) * sizeof (s->attrs[0]));
250 s->attrs[s->entries++] = 0;
251 return s->entries - 1;
255 find_attr (Attrstore *s, unsigned int entry, Id name)
258 if (entry >= s->entries)
260 nv = s->attrs[entry];
263 while (nv->key && s->keys[nv->key].name != name)
272 add_attr (Attrstore *s, unsigned int entry, LongNV attr)
276 ensure_entry (s, entry);
277 if (attr.key >= s->nkeys)
279 nv = s->attrs[entry];
283 while (nv->key && nv->key != attr.key)
287 len = nv - s->attrs[entry];
291 s->attrs[entry] = realloc (s->attrs[entry], len * sizeof (LongNV));
293 s->attrs[entry] = malloc (len * sizeof (LongNV));
294 nv = s->attrs[entry] + len - 2;
300 add_attr_int (Attrstore *s, unsigned int entry, Id name, unsigned int val)
303 nv.key = add_key (s, name, TYPE_ATTR_INT, 0);
305 add_attr (s, entry, nv);
309 add_attr_special_int (Attrstore *s, unsigned int entry, Id name, unsigned int val)
311 if (val > (TYPE_ATTR_SPECIAL_END - TYPE_ATTR_SPECIAL_START))
312 add_attr_int (s, entry, name, val);
316 nv.key = add_key (s, name, TYPE_ATTR_SPECIAL_START + val, 0);
317 add_attr (s, entry, nv);
322 add_attr_chunk (Attrstore *s, unsigned int entry, Id name, unsigned int ofs, unsigned int len)
325 nv.key = add_key (s, name, TYPE_ATTR_CHUNK, 0);
328 add_attr (s, entry, nv);
332 add_attr_blob (Attrstore *s, unsigned int entry, Id name, const void *ptr, unsigned int len)
334 if (((s->blob_next_free + BLOB_BLOCK) & ~BLOB_BLOCK)
335 != ((s->blob_next_free + len + BLOB_BLOCK) & ~BLOB_BLOCK))
337 unsigned int blobsz = (s->blob_next_free + len + BLOB_BLOCK) &~BLOB_BLOCK;
338 s->blob_store = xrealloc (s->blob_store, blobsz);
340 memcpy (s->blob_store + s->blob_next_free, ptr, len);
341 add_attr_chunk (s, entry, name, s->blob_next_free, len);
342 s->blob_next_free += len;
344 unsigned int npages = (s->blob_next_free + BLOB_PAGESIZE - 1) / BLOB_PAGESIZE;
345 if (npages != s->num_pages)
348 s->pages = xrealloc (s->pages, npages * sizeof (s->pages[0]));
349 for (p = s->pages + s->num_pages; s->num_pages < npages;
352 p->mapped_at = s->num_pages * BLOB_PAGESIZE;
360 add_attr_string (Attrstore *s, unsigned int entry, Id name, const char *val)
363 nv.key = add_key (s, name, TYPE_ATTR_STRING, 0);
364 nv.v.str = strdup (val);
365 add_attr (s, entry, nv);
369 add_attr_intlist_int (Attrstore *s, unsigned int entry, Id name, int val)
371 LongNV *nv = find_attr (s, entry, name);
377 while (nv->v.intlist[len])
379 nv->v.intlist = realloc (nv->v.intlist, (len + 2) * sizeof (nv->v.intlist[0]));
380 nv->v.intlist[len] = val;
381 nv->v.intlist[len+1] = 0;
386 mynv.key = add_key (s, name, TYPE_ATTR_INTLIST, 0);
387 mynv.v.intlist = malloc (2 * sizeof (mynv.v.intlist[0]));
388 mynv.v.intlist[0] = val;
389 mynv.v.intlist[1] = 0;
390 add_attr (s, entry, mynv);
395 add_attr_localids_id (Attrstore *s, unsigned int entry, Id name, LocalId id)
397 LongNV *nv = find_attr (s, entry, name);
401 while (nv->v.localids[len])
403 nv->v.localids = realloc (nv->v.localids, (len + 2) * sizeof (nv->v.localids[0]));
404 nv->v.localids[len] = id;
405 nv->v.localids[len+1] = 0;
410 mynv.key = add_key (s, name, TYPE_ATTR_LOCALIDS, 0);
411 mynv.v.localids = malloc (2 * sizeof (mynv.v.localids[0]));
412 mynv.v.localids[0] = id;
413 mynv.v.localids[1] = 0;
414 add_attr (s, entry, mynv);
419 add_attr_void (Attrstore *s, unsigned int entry, Id name)
422 nv.key = add_key (s, name, TYPE_VOID, 0);
423 add_attr (s, entry, nv);
427 merge_attrs (Attrstore *s, unsigned dest, unsigned src)
430 ensure_entry (s, dest);
434 for (; nv->key; nv++)
435 if (!find_attr (s, dest, s->keys[nv->key].name))
436 switch (s->keys[nv->key].type)
438 case TYPE_ATTR_INTLIST:
441 while (nv->v.intlist[len])
442 add_attr_intlist_int (s, dest, s->keys[nv->key].name, nv->v.intlist[len++]);
445 case TYPE_ATTR_LOCALIDS:
448 while (nv->v.localids[len])
449 add_attr_localids_id (s, dest, s->keys[nv->key].name, nv->v.localids[len++]);
452 case TYPE_ATTR_STRING:
453 add_attr_string (s, dest, s->keys[nv->key].name, nv->v.str);
456 add_attr (s, dest, *nv);
462 #define pool_debug(a,b,...) fprintf (stderr, __VA_ARGS__)
464 static Id read_id (FILE *fp, Id max);
466 /* This routine is used only when attributes are embedded into the
467 normal repo SOLV file. */
469 add_attr_from_file (Attrstore *s, unsigned entry, Id name, int type, Id *idmap, unsigned maxid, FILE *fp)
471 Pool *pool = s->pool;
472 //fprintf (stderr, "%s: attribute in a repo SOLV?\n", id2str (pool, name));
476 add_attr_void (s, entry, name);
478 case TYPE_ATTR_CHUNK:
480 unsigned ofs = read_id (fp, 0);
481 unsigned len = read_id (fp, 0);
482 add_attr_chunk (s, entry, name, ofs, len);
487 unsigned i = read_id(fp, 0);
488 add_attr_int (s, entry, name, i);
491 case TYPE_ATTR_STRING:
493 unsigned char localbuf[1024];
495 unsigned char *buf = localbuf;
496 unsigned len = sizeof (localbuf);
498 while((c = getc (fp)) != 0)
502 pool_debug (mypool, SAT_FATAL, "unexpected EOF\n");
505 /* Plus 1 as we also want to add the 0. */
512 memcpy (buf, localbuf, len - 256);
515 buf = xrealloc (buf, len);
520 add_attr_string (s, entry, name, (char*) buf);
525 case TYPE_ATTR_INTLIST:
528 while ((i = read_id(fp, 0)) != 0)
529 add_attr_intlist_int (s, entry, name, i);
532 case TYPE_ATTR_LOCALIDS:
535 /* The read ID will be pool-based. */
536 while ((i = read_id(fp, maxid)) != 0)
540 add_attr_localids_id (s, entry, name, str2localid (s, id2str (pool, i), 1));
545 if (type >= TYPE_ATTR_SPECIAL_START && type <= TYPE_ATTR_SPECIAL_END)
547 add_attr_special_int (s, entry, name, type - TYPE_ATTR_SPECIAL_START);
550 pool_debug(pool, SAT_FATAL, "unknown type %d\n", type);
555 /* Make sure all pages from PSTART to PEND (inclusive) are loaded,
556 and are consecutive. Return a pointer to the mapping of PSTART. */
558 load_page_range (Attrstore *s, unsigned int pstart, unsigned int pend)
560 unsigned char buf[BLOB_PAGESIZE];
563 /* Quick check in case all pages are there already and consecutive. */
564 for (i = pstart; i <= pend; i++)
565 if (s->pages[i].mapped_at == -1
567 && s->pages[i].mapped_at
568 != s->pages[i-1].mapped_at + BLOB_PAGESIZE))
571 return s->blob_store + s->pages[pstart].mapped_at;
573 /* Ensure that we can map the numbers of pages we need at all. */
574 if (pend - pstart + 1 > s->ncanmap)
576 unsigned int oldcan = s->ncanmap;
577 s->ncanmap = pend - pstart + 1;
580 s->mapped = xrealloc (s->mapped, s->ncanmap * sizeof (s->mapped[0]));
581 memset (s->mapped + oldcan, 0, (s->ncanmap - oldcan) * sizeof (s->mapped[0]));
582 s->blob_store = xrealloc (s->blob_store, s->ncanmap * BLOB_PAGESIZE);
584 fprintf (stderr, "PAGE: can map %d pages\n", s->ncanmap);
588 /* Now search for "cheap" space in our store. Space is cheap if it's either
589 free (very cheap) or contains pages we search for anyway. */
591 /* Setup cost array. */
592 unsigned int cost[s->ncanmap];
593 for (i = 0; i < s->ncanmap; i++)
595 unsigned int pnum = s->mapped[i];
601 Attrblobpage *p = s->pages + pnum;
602 assert (p->mapped_at != -1);
603 if (pnum >= pstart && pnum <= pend)
610 /* And search for cheapest space. */
611 unsigned int best_cost = -1;
612 unsigned int best = 0;
613 unsigned int same_cost = 0;
614 for (i = 0; i + pend - pstart < s->ncanmap; i++)
616 unsigned int c = cost[i];
618 for (j = 0; j < pend - pstart + 1; j++)
621 best_cost = c, best = i;
622 else if (c == best_cost)
624 /* A null cost won't become better. */
628 /* If all places have the same cost we would thrash on slot 0. Avoid
629 this by doing a round-robin strategy in this case. */
630 if (same_cost == s->ncanmap - pend + pstart - 1)
631 best = s->rr_counter++ % (s->ncanmap - pend + pstart);
633 /* So we want to map our pages from [best] to [best+pend-pstart].
634 Use a very simple strategy, which doesn't make the best use of
635 our resources, but works. Throw away all pages in that range
636 (even ours) then copy around ours (in case they were outside the
637 range) or read them in. */
638 for (i = best; i < best + pend - pstart + 1; i++)
640 unsigned int pnum = s->mapped[i];
642 /* If this page is exactly at the right place already,
643 no need to evict it. */
644 && pnum != pstart + i - best)
646 /* Evict this page. */
648 fprintf (stderr, "PAGE: evict page %d from %d\n", pnum, i);
652 s->pages[pnum].mapped_at = -1;
656 /* Everything is free now. Read in the pages we want. */
657 for (i = pstart; i <= pend; i++)
659 Attrblobpage *p = s->pages + i;
660 unsigned int pnum = i - pstart + best;
661 void *dest = s->blob_store + pnum * BLOB_PAGESIZE;
662 if (p->mapped_at != -1)
664 if (p->mapped_at != pnum * BLOB_PAGESIZE)
667 fprintf (stderr, "PAGECOPY: %d to %d\n", i, pnum);
669 /* Still mapped somewhere else, so just copy it from there. */
670 memcpy (dest, s->blob_store + p->mapped_at, BLOB_PAGESIZE);
671 s->mapped[p->mapped_at / BLOB_PAGESIZE] = 0;
676 unsigned int in_len = p->file_size;
677 unsigned int compressed = in_len & 1;
680 fprintf (stderr, "PAGEIN: %d to %d", i, pnum);
682 /* Not mapped, so read in this page. */
683 if (fseek (s->file, p->file_offset, SEEK_SET) < 0)
685 perror ("mapping fseek");
688 if (fread (compressed ? buf : dest, in_len, 1, s->file) != 1)
690 perror ("mapping fread");
695 unsigned int out_len;
696 out_len = unchecked_decompress_buf (buf, in_len,
697 dest, BLOB_PAGESIZE);
698 if (out_len != BLOB_PAGESIZE
699 && i < s->num_pages - 1)
701 fprintf (stderr, "can't decompress\n");
705 fprintf (stderr, " (expand %d to %d)", in_len, out_len);
709 fprintf (stderr, "\n");
712 p->mapped_at = pnum * BLOB_PAGESIZE;
713 s->mapped[pnum] = i + 1;
716 return s->blob_store + best * BLOB_PAGESIZE;
720 attr_retrieve_blob (Attrstore *s, unsigned int ofs, unsigned int len)
725 unsigned int pstart = ofs / BLOB_PAGESIZE;
726 unsigned int pend = (ofs + len - 1) / BLOB_PAGESIZE;
727 const void *m = load_page_range (s, pstart, pend);
728 return m + (ofs & (BLOB_PAGESIZE - 1));
732 if (ofs >= s->blob_next_free)
734 return s->blob_store + ofs;
737 #define FLAT_ATTR_BLOCK 127
738 #define KEY_BLOCK 127
739 #define SCHEMA_BLOCK 127
741 #define add_elem(buf,ofs,val,block) do { \
742 if (((ofs) & (block)) == 0) \
743 buf = xrealloc (buf, ((ofs) + (block) + 1) * sizeof((buf)[0])); \
744 (buf)[(ofs)++] = val; \
746 #define add_u16(buf,ofs,val,block) do {\
747 typedef int __wrong_buf__[(1-sizeof((buf)[0])) * (sizeof((buf)[0])-1)];\
748 add_elem(buf,ofs,(val) & 0xFF,block); \
749 add_elem(buf,ofs,((val) >> 8) & 0xFF,block); \
751 #define add_num(buf,ofs,val,block) do {\
752 typedef int __wrong_buf__[(1-sizeof((buf)[0])) * (sizeof((buf)[0])-1)];\
753 if ((val) >= (1 << 14)) \
755 if ((val) >= (1 << 28)) \
756 add_elem (buf,ofs,((val) >> 28) | 128, block); \
757 if ((val) >= (1 << 21)) \
758 add_elem (buf,ofs,((val) >> 21) | 128, block); \
759 add_elem (buf,ofs,((val) >> 14) | 128, block); \
761 if ((val) >= (1 << 7)) \
762 add_elem (buf,ofs,((val) >> 7) | 128, block); \
763 add_elem (buf,ofs,(val) & 127, block); \
767 longnv_cmp (const void *pa, const void *pb)
769 const LongNV *a = (const LongNV *)pa;
770 const LongNV *b = (const LongNV *)pb;
771 return a->key - b->key;
775 add_key (Attrstore *s, Id name, unsigned type, unsigned size)
778 for (i = 0; i < s->nkeys; i++)
779 if (s->keys[i].name == name && s->keys[i].type == type)
783 s->keys[i].size += size;
786 if ((s->nkeys & KEY_BLOCK) == 0)
787 s->keys = xrealloc (s->keys, (s->nkeys + KEY_BLOCK + 1) * sizeof (s->keys[0]));
788 s->keys[i].name = name;
789 s->keys[i].type = type;
790 s->keys[i].size = size;
795 attr_store_pack (Attrstore *s)
798 unsigned int old_mem = 0;
801 s->ent2attr = xcalloc (s->entries, sizeof (s->ent2attr[0]));
803 s->attr_next_free = 0;
809 add_num (s->flat_attrs, s->attr_next_free, 0, FLAT_ATTR_BLOCK);
810 add_elem (s->schemata, s->szschemata, 0, SCHEMA_BLOCK);
811 add_elem (s->schemaofs, s->nschemata, 0, SCHEMA_BLOCK);
813 for (i = 0; i < s->entries; i++)
815 unsigned int num_attrs = 0, ofs;
816 LongNV *nv = s->attrs[i];
821 old_mem += (num_attrs + 1) * sizeof (LongNV);
825 qsort (s->attrs[i], num_attrs, sizeof (LongNV), longnv_cmp);
826 unsigned int this_schema;
827 for (this_schema = 0; this_schema < s->nschemata; this_schema++)
829 for (ofs = 0; ofs < num_attrs; ofs++)
831 Id key = nv[ofs].key;
832 assert (s->schemaofs[this_schema] + ofs < s->szschemata);
833 if (key != s->schemata[s->schemaofs[this_schema]+ofs])
836 if (ofs == num_attrs && !s->schemata[s->schemaofs[this_schema]+ofs])
839 if (this_schema == s->nschemata)
841 /* This schema not found --> insert it. */
842 add_elem (s->schemaofs, s->nschemata, s->szschemata, SCHEMA_BLOCK);
843 for (ofs = 0; ofs < num_attrs; ofs++)
845 Id key = nv[ofs].key;
846 add_elem (s->schemata, s->szschemata, key, SCHEMA_BLOCK);
848 add_elem (s->schemata, s->szschemata, 0, SCHEMA_BLOCK);
850 s->ent2attr[i] = s->attr_next_free;
851 add_num (s->flat_attrs, s->attr_next_free, this_schema, FLAT_ATTR_BLOCK);
852 for (ofs = 0; ofs < num_attrs; ofs++)
853 switch (s->keys[nv[ofs].key].type)
859 unsigned int i = nv[ofs].v.i[0];
860 add_num (s->flat_attrs, s->attr_next_free, i, FLAT_ATTR_BLOCK);
863 case TYPE_ATTR_CHUNK:
865 unsigned int i = nv[ofs].v.i[0];
866 add_num (s->flat_attrs, s->attr_next_free, i, FLAT_ATTR_BLOCK);
868 add_num (s->flat_attrs, s->attr_next_free, i, FLAT_ATTR_BLOCK);
871 case TYPE_ATTR_STRING:
873 const char *str = nv[ofs].v.str;
875 add_elem (s->flat_attrs, s->attr_next_free, *str, FLAT_ATTR_BLOCK);
876 add_elem (s->flat_attrs, s->attr_next_free, 0, FLAT_ATTR_BLOCK);
877 old_mem += strlen ((const char*)nv[ofs].v.str) + 1;
878 xfree ((void*)nv[ofs].v.str);
881 case TYPE_ATTR_INTLIST:
883 const int *il = nv[ofs].v.intlist;
885 for (; (i = *il) != 0; il++, old_mem += 4)
886 add_num (s->flat_attrs, s->attr_next_free, i, FLAT_ATTR_BLOCK);
887 add_num (s->flat_attrs, s->attr_next_free, 0, FLAT_ATTR_BLOCK);
889 xfree (nv[ofs].v.intlist);
892 case TYPE_ATTR_LOCALIDS:
894 const Id *il = nv[ofs].v.localids;
896 for (; (i = *il) != 0; il++, old_mem += 4)
897 add_num (s->flat_attrs, s->attr_next_free, i, FLAT_ATTR_BLOCK);
898 add_num (s->flat_attrs, s->attr_next_free, 0, FLAT_ATTR_BLOCK);
900 xfree (nv[ofs].v.localids);
908 old_mem += s->entries * sizeof (s->attrs[0]);
912 /* Remove the hashtable too, it will be build on demand in str2localid
913 the next time we call it, which should not happen while in packed mode. */
914 old_mem += (s->ss.stringhashmask + 1) * sizeof (s->ss.stringhashtbl[0]);
915 free (s->ss.stringhashtbl);
916 s->ss.stringhashtbl = 0;
917 s->ss.stringhashmask = 0;
919 fprintf (stderr, "%d\n", old_mem);
920 fprintf (stderr, "%zd\n", s->entries * sizeof(s->ent2attr[0]));
921 fprintf (stderr, "%d\n", s->attr_next_free);
922 fprintf (stderr, "%zd\n", s->nschemata * sizeof(s->schemaofs[0]));
923 fprintf (stderr, "%zd\n", s->szschemata * sizeof(s->schemata[0]));
924 fprintf (stderr, "pages %d\n", s->num_pages);
928 /* Pages in all blob pages, and deactivates paging. */
930 pagein_all (Attrstore *s)
932 /* If we have no backing file everything is there already. */
935 /*fprintf (stderr, "Aieee!\n");
940 attr_store_unpack (Attrstore *s)
948 /* Make the store writable right away, so we can use our adder functions. */
950 s->attrs = xcalloc (s->entries, sizeof (s->attrs[0]));
952 for (i = 0; i < s->entries; i++)
955 FOR_ATTRS (s, i, &ai)
960 add_attr_void (s, i, ai.name);
963 add_attr_int (s, i, ai.name, ai.as_int);
965 case TYPE_ATTR_CHUNK:
966 add_attr_chunk (s, i, ai.name, ai.as_chunk[0], ai.as_chunk[1]);
968 case TYPE_ATTR_STRING:
969 add_attr_string (s, i, ai.name, ai.as_string);
971 case TYPE_ATTR_INTLIST:
976 get_num (ai.as_numlist, val);
979 add_attr_intlist_int (s, i, ai.name, val);
983 case TYPE_ATTR_LOCALIDS:
988 get_num (ai.as_numlist, val);
991 add_attr_localids_id (s, i, ai.name, val);
996 if (ai.type >= TYPE_ATTR_SPECIAL_START
997 && ai.type <= TYPE_ATTR_SPECIAL_END)
998 add_attr_special_int (s, i, ai.name, ai.type - TYPE_ATTR_SPECIAL_START);
1004 xfree (s->ent2attr);
1006 xfree (s->flat_attrs);
1008 s->attr_next_free = 0;
1009 xfree (s->schemaofs);
1012 xfree (s->schemata);
1018 write_u8(FILE *fp, unsigned int x)
1020 if (putc(x, fp) == EOF)
1022 perror("write error");
1028 write_u32(FILE *fp, unsigned int x)
1030 if (putc(x >> 24, fp) == EOF ||
1031 putc(x >> 16, fp) == EOF ||
1032 putc(x >> 8, fp) == EOF ||
1035 perror("write error");
1041 write_id(FILE *fp, Id x)
1046 putc((x >> 28) | 128, fp);
1048 putc((x >> 21) | 128, fp);
1049 putc((x >> 14) | 128, fp);
1052 putc((x >> 7) | 128, fp);
1053 if (putc(x & 127, fp) == EOF)
1055 perror("write error");
1061 write_idarray(FILE *fp, Id *ids)
1075 id = (id & 63) | ((id & ~63) << 1);
1081 write_id(fp, id | 64);
1086 write_pages (FILE *fp, Attrstore *s)
1089 unsigned char buf[BLOB_PAGESIZE];
1091 /* The compressed pages in the file have different sizes, so we need
1092 to store these sizes somewhere, either in front of all page data,
1093 interleaved with the page data (in front of each page), or after
1094 the page data. At this point we don't yet know the final compressed
1095 sizes. These are the pros and cons:
1096 * in front of all page data
1097 + when reading back we only have to read this header, and know
1098 where every page data is placed
1099 - we have to compress all pages first before starting to write them.
1100 Our output stream might be unseekable, so we can't simply
1101 reserve space for the header, write all pages and then update the
1102 header. This needs memory for all compressed pages at once.
1103 * interleaved with page data
1104 + we can compress and write per page, low memory overhead
1105 - when reading back we have to read at least those numbers,
1106 thereby either having to read all page data, or at least seek
1108 * after all page data
1109 + we can do streamed writing, remembering the sizes per page,
1110 and emitting the header (which is a footer then) at the end
1111 - reading back is hardest: before the page data we don't know
1112 how long it is overall, so we have to put that information
1113 also at the end, but it needs a determinate position, so can
1114 only be at a known offset from the end. But that means that
1115 we must be able to seek when reading back. We have this
1116 wish anyway in case we want to use on-demand paging then, but
1119 Of all these it seems the best good/bad ratio is with the interleaved
1120 storage. No memory overhead at writing and no unreasonable limitations
1122 write_u32 (fp, s->blob_next_free);
1123 write_u32 (fp, BLOB_PAGESIZE);
1124 assert (((s->blob_next_free + BLOB_PAGESIZE - 1) / BLOB_PAGESIZE) == s->num_pages);
1125 for (i = 0; i < s->num_pages; i++)
1127 unsigned int in_len;
1128 unsigned int out_len;
1130 if (i == s->num_pages - 1)
1131 in_len = s->blob_next_free & (BLOB_PAGESIZE - 1);
1133 in_len = BLOB_PAGESIZE;
1136 in = attr_retrieve_blob (s, i * BLOB_PAGESIZE, in_len);
1137 out_len = compress_buf (in, in_len, buf, in_len - 1);
1140 memcpy (buf, in, in_len);
1147 fprintf (stderr, "page %d: %d -> %d\n", i, in_len, out_len);
1149 write_u32 (fp, out_len * 2 + (out_len != in_len));
1151 && fwrite (buf, out_len, 1, fp) != 1)
1153 perror("write error");
1160 write_attr_store (FILE *fp, Attrstore *s)
1163 unsigned local_ssize;
1165 attr_store_pack (s);
1167 /* Transform our attribute names (pool string IDs) into local IDs. */
1168 for (i = 1; i < s->nkeys; i++)
1169 s->keys[i].name = str2localid (s, id2str (s->pool, s->keys[i].name), 1);
1171 /* write file header */
1172 write_u32(fp, 'S' << 24 | 'O' << 16 | 'L' << 8 | 'V');
1173 write_u32(fp, SOLV_VERSION_2);
1176 write_u32(fp, s->ss.nstrings); // nstrings
1177 write_u32(fp, 0); // nrels
1178 write_u32(fp, s->entries); // nsolvables
1179 write_u32(fp, s->nkeys);
1180 write_u32(fp, s->nschemata);
1181 write_u32(fp, 0); /* no info block */
1182 unsigned solv_flags = 0;
1183 solv_flags |= SOLV_FLAG_PACKEDSIZES;
1184 //solv_flags |= SOLV_FLAG_PREFIX_POOL;
1185 write_u32(fp, solv_flags);
1187 for (i = 1, local_ssize = 0; i < (unsigned)s->ss.nstrings; i++)
1188 local_ssize += strlen (localid2str (s, i)) + 1;
1190 write_u32 (fp, local_ssize);
1191 for (i = 1; i < (unsigned)s->ss.nstrings; i++)
1193 const char *str = localid2str (s, i);
1194 if (fwrite(str, strlen(str) + 1, 1, fp) != 1)
1196 perror("write error");
1201 for (i = 1; i < s->nkeys; i++)
1203 write_id (fp, s->keys[i].name);
1204 write_id (fp, s->keys[i].type);
1205 write_id (fp, s->keys[i].size);
1207 /* Also transform back the names (now local IDs) into pool IDs,
1208 so we can use the pool also after writing. */
1209 s->keys[i].name = str2id (s->pool, localid2str (s, s->keys[i].name), 0);
1212 write_id (fp, s->szschemata);
1213 Id *ids = s->schemata + 0;
1214 for (i = 0; i < s->nschemata; i++)
1215 ids = write_idarray (fp, ids);
1216 assert (ids == s->schemata + s->szschemata);
1218 /* Convert our offsets into sizes. */
1219 unsigned end = s->attr_next_free;
1220 for (i = s->entries; i > 0;)
1225 s->ent2attr[i] = end - s->ent2attr[i];
1226 end = end - s->ent2attr[i];
1229 /* The first zero should not have been consumed, but everything else. */
1231 /* Write the sizes and convert back to offsets. */
1233 for (i = 0; i < s->entries; i++)
1235 write_id (fp, s->ent2attr[i]);
1237 s->ent2attr[i] += start, start = s->ent2attr[i];
1241 && fwrite (s->flat_attrs + 1, s->attr_next_free - 1, 1, fp) != 1)
1243 perror ("write error");
1247 write_pages (fp, s);
1256 for (i = 0; i < 4; i++)
1261 fprintf(stderr, "unexpected EOF\n");
1270 read_id(FILE *fp, Id max)
1275 for (i = 0; i < 5; i++)
1280 fprintf(stderr, "unexpected EOF\n");
1286 if (max && x >= max)
1288 fprintf(stderr, "read_id: id too large (%u/%u)\n", x, max);
1293 x = (x << 7) ^ c ^ 128;
1295 fprintf(stderr, "read_id: id too long\n");
1300 read_idarray(FILE *fp, Id max, Id *map, Id *store, Id *end, int relative)
1310 pool_debug(mypool, SAT_FATAL, "unexpected EOF\n");
1315 x = (x << 6) | (c & 63);
1318 if (x == 0 && c == 0x40)
1323 pool_debug(mypool, SAT_FATAL, "read_idarray: array overflow\n");
1326 *store++ = SOLVABLE_PREREQMARKER;
1336 pool_debug(mypool, SAT_FATAL, "read_idarray: id too large (%u/%u)\n", x, max);
1343 pool_debug(mypool, SAT_FATAL, "read_idarray: array overflow\n");
1349 if (x == 0) /* already have trailing zero? */
1353 pool_debug(mypool, SAT_FATAL, "read_idarray: array overflow\n");
1362 x = (x << 7) ^ c ^ 128;
1366 /* Try to either setup on-demand paging (using FP as backing
1367 file), or in case that doesn't work (FP not seekable) slurps in
1368 all pages and deactivates paging. */
1370 read_or_setup_pages (FILE *fp, Attrstore *s)
1372 unsigned int blobsz;
1373 unsigned int pagesz;
1374 unsigned int npages;
1376 unsigned int can_seek;
1378 unsigned char buf[BLOB_PAGESIZE];
1379 blobsz = read_u32 (fp);
1380 pagesz = read_u32 (fp);
1381 if (pagesz != BLOB_PAGESIZE)
1383 /* We could handle this by slurping in everything. */
1384 fprintf (stderr, "non matching page size\n");
1388 if ((cur_file_ofs = ftell (fp)) < 0)
1391 fprintf (stderr, "can %sseek\n", can_seek ? "" : "NOT ");
1392 npages = (blobsz + BLOB_PAGESIZE - 1) / BLOB_PAGESIZE;
1394 s->num_pages = npages;
1395 s->pages = xmalloc (npages * sizeof (s->pages[0]));
1397 /* If we can't seek on our input we have to slurp in everything. */
1400 s->blob_next_free = blobsz;
1401 s->blob_store = xrealloc (s->blob_store, (s->blob_next_free + BLOB_BLOCK) &~BLOB_BLOCK);
1403 for (i = 0; i < npages; i++)
1405 unsigned int in_len = read_u32 (fp);
1406 unsigned int compressed = in_len & 1;
1407 Attrblobpage *p = s->pages + i;
1410 fprintf (stderr, "page %d: len %d (%scompressed)\n",
1411 i, in_len, compressed ? "" : "not ");
1417 p->file_offset = cur_file_ofs;
1418 p->file_size = in_len * 2 + compressed;
1419 if (fseek (fp, in_len, SEEK_CUR) < 0)
1422 fprintf (stderr, "can't seek after we thought we can\n");
1423 /* We can't fall back to non-seeking behaviour as we already
1424 read over some data pages without storing them away. */
1427 cur_file_ofs += in_len;
1431 unsigned int out_len;
1432 void *dest = s->blob_store + i * BLOB_PAGESIZE;
1433 p->mapped_at = i * BLOB_PAGESIZE;
1436 /* We can't seek, so suck everything in. */
1437 if (fread (compressed ? buf : dest, in_len, 1, fp) != 1)
1444 out_len = unchecked_decompress_buf (buf, in_len,
1445 dest, BLOB_PAGESIZE);
1446 if (out_len != BLOB_PAGESIZE
1449 fprintf (stderr, "can't decompress\n");
1458 /* If we are here we were able to seek to all page
1459 positions, so activate paging by copying FP into our structure.
1460 We dup() the file, so that our callers can fclose() it and we
1461 still have it open. But this means that we share file positions
1462 with the input filedesc. So in case our caller reads it after us,
1463 and calls back into us we might change the file position unexpectedly
1465 int fd = dup (fileno (fp));
1468 /* Jeez! What a bloody system, we can't dup() anymore. */
1472 /* XXX we don't close this yet anywhere. */
1473 s->file = fdopen (fd, "r");
1476 /* My God! What happened now? */
1484 attr_store_read (FILE *fp, Pool *pool)
1488 unsigned local_ssize;
1489 unsigned nstrings, nschemata;
1490 Attrstore *s = new_store (pool);
1492 if (read_u32(fp) != ('S' << 24 | 'O' << 16 | 'L' << 8 | 'V'))
1494 pool_debug(pool, SAT_FATAL, "not a SOLV file\n");
1497 unsigned solvversion = read_u32(fp);
1498 switch (solvversion)
1500 case SOLV_VERSION_2:
1503 pool_debug(pool, SAT_FATAL, "unsupported SOLV version\n");
1507 nstrings = read_u32(fp);
1508 read_u32(fp); //nrels
1509 nentries = read_u32(fp);
1510 s->nkeys = read_u32(fp);
1511 nschemata = read_u32(fp);
1512 read_u32(fp); //ninfo
1513 unsigned solvflags = read_u32(fp);
1514 if (!(solvflags & SOLV_FLAG_PACKEDSIZES))
1516 pool_debug(pool, SAT_FATAL, "invalid attribute store\n");
1520 /* Slightly hacky. Our local string pool already contains "<NULL>" and
1521 "". We write out the "" too, so we have to read over it. We write it
1522 out to be compatible with the SOLV file and to not have to introduce
1523 merging and mapping the string IDs. */
1524 local_ssize = read_u32 (fp) - 1;
1525 char *strsp = (char *)xrealloc(s->ss.stringspace, s->ss.sstrings + local_ssize + 1);
1526 Offset *str = (Offset *)xrealloc(s->ss.strings, (nstrings) * sizeof(Offset));
1528 s->ss.stringspace = strsp;
1529 s->ss.strings = str;
1530 strsp += s->ss.sstrings;
1532 unsigned char ignore_char = 1;
1533 if (fread(&ignore_char, 1, 1, fp) != 1
1534 || (local_ssize && fread(strsp, local_ssize, 1, fp) != 1)
1535 || ignore_char != 0)
1537 perror ("read error while reading strings");
1540 strsp[local_ssize] = 0;
1542 /* Don't build hashtable here, it will be built on demand by str2localid
1543 should we call that. */
1545 strsp = s->ss.stringspace;
1546 s->ss.nstrings = nstrings;
1547 for (i = 0; i < nstrings; i++)
1549 str[i] = strsp - s->ss.stringspace;
1550 strsp += strlen (strsp) + 1;
1552 s->ss.sstrings = strsp - s->ss.stringspace;
1554 s->keys = xrealloc (s->keys, ((s->nkeys + KEY_BLOCK) & ~KEY_BLOCK) * sizeof (s->keys[0]));
1555 /* s->keys[0] is initialized in new_store. */
1556 for (i = 1; i < s->nkeys; i++)
1558 s->keys[i].name = read_id (fp, nstrings);
1559 s->keys[i].type = read_id (fp, TYPE_ATTR_TYPE_MAX + 1);
1560 s->keys[i].size = read_id (fp, 0);
1562 /* Globalize the attribute names (they are local IDs right now). */
1563 s->keys[i].name = str2id (s->pool, localid2str (s, s->keys[i].name), 1);
1566 s->szschemata = read_id (fp, 0);
1568 s->schemata = xmalloc (((s->szschemata + SCHEMA_BLOCK) & ~SCHEMA_BLOCK) * sizeof (s->schemata[0]));
1570 Id *ids = s->schemata;
1571 //add_elem (s->schemaofs, s->nschemata, 0, SCHEMA_BLOCK);
1573 while (ids < s->schemata + s->szschemata)
1575 add_elem (s->schemaofs, s->nschemata, ids - s->schemata, SCHEMA_BLOCK);
1576 ids = read_idarray (fp, s->nkeys, 0, ids, s->schemata + s->szschemata, 0);
1578 assert (ids == s->schemata + s->szschemata);
1579 assert (nschemata == s->nschemata);
1581 s->entries = nentries;
1583 s->ent2attr = xmalloc (s->entries * sizeof (s->ent2attr[0]));
1585 for (i = 0; i < s->entries; i++)
1587 int d = read_id (fp, 0);
1589 s->ent2attr[i] = start, start += d;
1594 s->attr_next_free = start;
1595 s->flat_attrs = xmalloc (((s->attr_next_free + FLAT_ATTR_BLOCK) & ~FLAT_ATTR_BLOCK) * sizeof (s->flat_attrs[0]));
1596 s->flat_attrs[0] = 0;
1597 if (s->entries && fread (s->flat_attrs + 1, s->attr_next_free - 1, 1, fp) != 1)
1599 perror ("read error");
1603 read_or_setup_pages (fp, s);
1611 attr_store_search_s (Attrstore *s, const char *pattern, int flags, Id name, cb_attr_search_s cb)
1616 /* If we search for a glob, but we don't have a wildcard pattern, make this
1617 an exact string search. */
1618 if ((flags & 7) == SEARCH_GLOB
1619 && !strpbrk (pattern, "?*["))
1620 flags = SEARCH_STRING | (flags & ~7);
1621 if ((flags & 7) == SEARCH_REGEX)
1623 /* We feed multiple lines eventually (e.g. authors or descriptions),
1624 so set REG_NEWLINE. */
1625 if (regcomp (®ex, pattern,
1626 REG_EXTENDED | REG_NOSUB | REG_NEWLINE
1627 | ((flags & SEARCH_NOCASE) ? REG_ICASE : 0)) != 0)
1630 for (i = 0; i < s->entries; i++)
1631 FOR_ATTRS (s, i, &ai)
1634 if (name && name != ai.name)
1640 case TYPE_ATTR_INTLIST:
1642 case TYPE_ATTR_CHUNK:
1643 if (!(flags & SEARCH_BLOBS))
1645 str = attr_retrieve_blob (s, ai.as_chunk[0], ai.as_chunk[1]);
1647 case TYPE_ATTR_STRING:
1650 case TYPE_ATTR_LOCALIDS:
1653 get_num (ai.as_numlist, val);
1655 str = localid2str (s, val);
1663 unsigned int match = 0;
1666 case SEARCH_SUBSTRING:
1667 if (flags & SEARCH_NOCASE)
1668 match = !! strcasestr (str, pattern);
1670 match = !! strstr (str, pattern);
1673 if (flags & SEARCH_NOCASE)
1674 match = ! strcasecmp (str, pattern);
1676 match = ! strcmp (str, pattern);
1679 match = ! fnmatch (pattern, str,
1680 (flags & SEARCH_NOCASE) ? FNM_CASEFOLD : 0);
1683 match = ! regexec (®ex, str, 0, NULL, 0);
1689 cb (s, i, ai.name, str);
1690 if (ai.type != TYPE_ATTR_LOCALIDS)
1693 get_num (ai.as_numlist, val);
1696 str = localid2str (s, val);
1699 if ((flags & 7) == SEARCH_REGEX)
1707 Pool *pool = pool_create ();
1708 Attrstore *s = new_store (pool);
1709 unsigned int id1 = new_entry (s);
1710 unsigned int id2 = new_entry (s);
1711 unsigned int id3 = new_entry (s);
1712 unsigned int id4 = new_entry (s);
1713 add_attr_int (s, id1, str2id (s, "name1", 1), 42);
1714 add_attr_chunk (s, id1, str2id (s->pool, "name2", 1), 9876, 1024);
1715 add_attr_string (s, id1, str2id (s->pool, "name3", 1), "hallo");
1716 add_attr_int (s, id1, str2id (s->pool, "name1", 1), 43);
1717 add_attr_intlist_int (s, id1, str2id (s->pool, "intlist1", 1), 3);
1718 add_attr_intlist_int (s, id1, str2id (s->pool, "intlist1", 1), 14);
1719 add_attr_intlist_int (s, id1, str2id (s->pool, "intlist1", 1), 1);
1720 add_attr_intlist_int (s, id1, str2id (s->pool, "intlist1", 1), 59);
1721 add_attr_localids_id (s, id1, str2id (s->pool, "l_ids1", 1), str2localid (s, "one", 1));
1722 add_attr_localids_id (s, id1, str2id (s->pool, "l_ids1", 1), str2localid (s, "two", 1));
1723 add_attr_localids_id (s, id1, str2id (s->pool, "l_ids1", 1), str2localid (s, "three", 1));
1724 add_attr_localids_id (s, id1, str2id (s->pool, "l_ids2", 1), str2localid (s, "three", 1));
1725 add_attr_localids_id (s, id1, str2id (s->pool, "l_ids2", 1), str2localid (s, "two", 1));
1726 add_attr_localids_id (s, id1, str2id (s->pool, "l_ids2", 1), str2localid (s, "one", 1));
1727 write_attr_store (stdout, s);