2 * Copyright (c) 2007, Novell Inc.
4 * This program is licensed under the BSD license, read LICENSE.BSD
5 * for further information
8 /* We need FNM_CASEFOLD and strcasestr. */
13 #include <sys/types.h>
24 #include "attr_store.h"
29 #include "attr_store_p.h"
33 /* #define DEBUG_PAGING */
35 #define BLOB_BLOCK 65535
37 #define STRINGSPACE_BLOCK 1023
38 #define STRING_BLOCK 127
39 #define LOCALID_NULL 0
40 #define LOCALID_EMPTY 1
42 static Id add_key (Attrstore *s, Id name, unsigned type, unsigned size);
45 new_store (Pool *pool)
47 static const char *predef_strings[] = {
52 Attrstore *s = calloc (1, sizeof (Attrstore));
54 stringpool_init (&s->ss, predef_strings);
61 str2localid (Attrstore *s, const char *str, int create)
63 return stringpool_str2id (&s->ss, str, create);
67 localid2str(Attrstore *s, LocalId id)
69 return s->ss.stringspace + s->ss.strings[id];
73 setup_dirs (Attrstore *s)
75 static const char *ss_init_strs[] =
82 s->dirtree.dirs = calloc (1024, sizeof (s->dirtree.dirs[0]));
84 s->dirtree.dirs[0].child = 0;
85 s->dirtree.dirs[0].sibling = 0;
86 s->dirtree.dirs[0].name = 0;
87 s->dirtree.dirs[1].child = 0;
88 s->dirtree.dirs[1].sibling = 0;
89 s->dirtree.dirs[1].name = STRID_EMPTY;
91 s->dirtree.dirstack_size = 16;
92 s->dirtree.dirstack = malloc (s->dirtree.dirstack_size * sizeof (s->dirtree.dirstack[0]));
93 s->dirtree.ndirstack = 0;
94 s->dirtree.dirstack[s->dirtree.ndirstack++] = 1; //dir-id of /
96 stringpool_init (&s->dirtree.ss, ss_init_strs);
100 dir_lookup_1 (Attrstore *s, unsigned dir, const char *name, unsigned insert)
107 const char *end = strchrnul (name, '/');
108 nameid = stringpool_strn2id (&s->dirtree.ss, name, end - name, 1);
110 Dir *dirs = s->dirtree.dirs;
111 for (c = dirs[dir].child; c; c = dirs[c].sibling, num++)
112 if (nameid == dirs[c].name)
118 c = s->dirtree.ndirs++;
120 dirs = realloc (dirs, (c + 1024) * sizeof (dirs[0]));
122 dirs[c].sibling = dirs[dir].child;
123 dirs[c].name = nameid;
124 dirs[c].parent = dir;
126 s->dirtree.dirs = dirs;
128 if (!(s->dirtree.ndirstack & 15))
130 s->dirtree.dirstack_size += 16;
131 s->dirtree.dirstack = realloc (s->dirtree.dirstack, s->dirtree.dirstack_size * sizeof (s->dirtree.dirstack[0]));
133 s->dirtree.dirstack[s->dirtree.ndirstack++] = c;
136 unsigned ret = dir_lookup_1 (s, c, end + 1, insert);
141 dir_lookup (Attrstore *s, const char *name, unsigned insert)
143 if (!s->dirtree.ndirs)
146 /* Detect number of common path components. Accept multiple // . */
147 const char *new_start;
149 for (components = 1, new_start = name; components < s->dirtree.ndirstack; )
153 while (*new_start == '/')
155 dirname = stringpool_id2str (&s->dirtree.ss, s->dirtree.dirs[s->dirtree.dirstack[components]].name);
156 for (ofs = 0;; ofs++)
158 char n = new_start[ofs];
159 char d = dirname[ofs];
160 if (d == 0 && (n == 0 || n == '/'))
173 while (*new_start == '/')
177 /* We have always / on the stack. */
178 //assert (ndirstack);
179 //assert (ndirstack >= components);
180 s->dirtree.ndirstack = components;
181 unsigned ret = s->dirtree.dirstack[s->dirtree.ndirstack - 1];
183 ret = dir_lookup_1 (s, ret, new_start, insert);
184 //assert (ret == dirstack[ndirstack - 1]);
189 dir_parent (Attrstore *s, unsigned dir)
191 return s->dirtree.dirs[dir].parent;
195 dir2str (Attrstore *s, unsigned dir, char **str, unsigned *len)
198 Id ids[s->dirtree.dirstack_size + 1];
200 for (i = 0; dir > 1; dir = dir_parent (s, dir), i++)
201 ids[i] = s->dirtree.dirs[dir].name;
204 for (i = 0; i < ii; i++)
205 l += 1 + strlen (stringpool_id2str (&s->dirtree.ss, ids[i]));
216 const char *name = stringpool_id2str (&s->dirtree.ss, ids[i]);
217 dest = mempcpy (dest, name, strlen (name));
224 ensure_entry (Attrstore *s, unsigned int entry)
226 unsigned int old_num = s->entries;
227 if (entry < s->entries)
229 s->entries = entry + 1;
230 if (((old_num + 127) & ~127) != ((s->entries + 127) & ~127))
233 s->attrs = realloc (s->attrs, (((s->entries+127) & ~127) * sizeof (s->attrs[0])));
235 s->attrs = malloc (((s->entries+127) & ~127) * sizeof (s->attrs[0]));
237 memset (s->attrs + old_num, 0, (s->entries - old_num) * sizeof (s->attrs[0]));
241 new_entry (Attrstore *s)
243 if ((s->entries & 127) == 0)
246 s->attrs = realloc (s->attrs, ((s->entries+128) * sizeof (s->attrs[0])));
248 s->attrs = malloc ((s->entries+128) * sizeof (s->attrs[0]));
250 s->attrs[s->entries++] = 0;
251 return s->entries - 1;
255 find_attr (Attrstore *s, unsigned int entry, Id name)
258 if (entry >= s->entries)
260 nv = s->attrs[entry];
263 while (nv->key && s->keys[nv->key].name != name)
272 add_attr (Attrstore *s, unsigned int entry, LongNV attr)
276 ensure_entry (s, entry);
277 if (attr.key >= s->nkeys)
279 nv = s->attrs[entry];
283 while (nv->key && nv->key != attr.key)
287 len = nv - s->attrs[entry];
291 s->attrs[entry] = realloc (s->attrs[entry], len * sizeof (LongNV));
293 s->attrs[entry] = malloc (len * sizeof (LongNV));
294 nv = s->attrs[entry] + len - 2;
300 add_attr_int (Attrstore *s, unsigned int entry, Id name, unsigned int val)
303 nv.key = add_key (s, name, TYPE_ATTR_INT, 0);
305 add_attr (s, entry, nv);
309 add_attr_special_int (Attrstore *s, unsigned int entry, Id name, unsigned int val)
311 if (val > (TYPE_ATTR_SPECIAL_END - TYPE_ATTR_SPECIAL_START))
312 add_attr_int (s, entry, name, val);
316 nv.key = add_key (s, name, TYPE_ATTR_SPECIAL_START + val, 0);
317 add_attr (s, entry, nv);
322 add_attr_chunk (Attrstore *s, unsigned int entry, Id name, unsigned int ofs, unsigned int len)
325 nv.key = add_key (s, name, TYPE_ATTR_CHUNK, 0);
328 add_attr (s, entry, nv);
332 add_attr_blob (Attrstore *s, unsigned int entry, Id name, const void *ptr, unsigned int len)
334 if (((s->blob_next_free + BLOB_BLOCK) & ~BLOB_BLOCK)
335 != ((s->blob_next_free + len + BLOB_BLOCK) & ~BLOB_BLOCK))
337 unsigned int blobsz = (s->blob_next_free + len + BLOB_BLOCK) &~BLOB_BLOCK;
338 s->blob_store = xrealloc (s->blob_store, blobsz);
340 memcpy (s->blob_store + s->blob_next_free, ptr, len);
341 add_attr_chunk (s, entry, name, s->blob_next_free, len);
342 s->blob_next_free += len;
344 unsigned int npages = (s->blob_next_free + BLOB_PAGESIZE - 1) / BLOB_PAGESIZE;
345 if (npages != s->num_pages)
348 s->pages = xrealloc (s->pages, npages * sizeof (s->pages[0]));
349 for (p = s->pages + s->num_pages; s->num_pages < npages;
352 p->mapped_at = s->num_pages * BLOB_PAGESIZE;
360 add_attr_string (Attrstore *s, unsigned int entry, Id name, const char *val)
363 nv.key = add_key (s, name, TYPE_ATTR_STRING, 0);
364 nv.v.str = strdup (val);
365 add_attr (s, entry, nv);
369 add_attr_intlist_int (Attrstore *s, unsigned int entry, Id name, int val)
371 LongNV *nv = find_attr (s, entry, name);
374 unsigned len = nv->v.intlist[0]++;
375 nv->v.intlist = realloc (nv->v.intlist, (len + 2) * sizeof (nv->v.intlist[0]));
376 nv->v.intlist[len+1] = val;
381 mynv.key = add_key (s, name, TYPE_ATTR_INTLIST, 0);
382 mynv.v.intlist = malloc (2 * sizeof (mynv.v.intlist[0]));
383 mynv.v.intlist[0] = 1;
384 mynv.v.intlist[1] = val;
385 add_attr (s, entry, mynv);
390 add_attr_localids_id (Attrstore *s, unsigned int entry, Id name, LocalId id)
392 LongNV *nv = find_attr (s, entry, name);
396 while (nv->v.localids[len])
398 nv->v.localids = realloc (nv->v.localids, (len + 2) * sizeof (nv->v.localids[0]));
399 nv->v.localids[len] = id;
400 nv->v.localids[len+1] = 0;
405 mynv.key = add_key (s, name, TYPE_ATTR_LOCALIDS, 0);
406 mynv.v.localids = malloc (2 * sizeof (mynv.v.localids[0]));
407 mynv.v.localids[0] = id;
408 mynv.v.localids[1] = 0;
409 add_attr (s, entry, mynv);
414 add_attr_void (Attrstore *s, unsigned int entry, Id name)
417 nv.key = add_key (s, name, TYPE_VOID, 0);
418 add_attr (s, entry, nv);
422 merge_attrs (Attrstore *s, unsigned dest, unsigned src)
425 ensure_entry (s, dest);
429 for (; nv->key; nv++)
430 if (!find_attr (s, dest, s->keys[nv->key].name))
431 switch (s->keys[nv->key].type)
433 case TYPE_ATTR_INTLIST:
435 unsigned i, len = nv->v.intlist[0];
436 for (i = 0; i < len; i++)
437 add_attr_intlist_int (s, dest, s->keys[nv->key].name, nv->v.intlist[i + 1]);
440 case TYPE_ATTR_LOCALIDS:
443 while (nv->v.localids[len])
444 add_attr_localids_id (s, dest, s->keys[nv->key].name, nv->v.localids[len++]);
447 case TYPE_ATTR_STRING:
448 add_attr_string (s, dest, s->keys[nv->key].name, nv->v.str);
451 add_attr (s, dest, *nv);
457 #define pool_debug(a,b,...) fprintf (stderr, __VA_ARGS__)
459 static Id read_id (FILE *fp, Id max);
461 /* This routine is used only when attributes are embedded into the
462 normal repo SOLV file. */
464 add_attr_from_file (Attrstore *s, unsigned entry, Id name, int type, Id *idmap, unsigned maxid, FILE *fp)
466 Pool *pool = s->pool;
467 //fprintf (stderr, "%s: attribute in a repo SOLV?\n", id2str (pool, name));
471 add_attr_void (s, entry, name);
473 case TYPE_ATTR_CHUNK:
475 unsigned ofs = read_id (fp, 0);
476 unsigned len = read_id (fp, 0);
477 add_attr_chunk (s, entry, name, ofs, len);
482 unsigned i = read_id(fp, 0);
483 add_attr_int (s, entry, name, i);
486 case TYPE_ATTR_STRING:
488 unsigned char localbuf[1024];
490 unsigned char *buf = localbuf;
491 unsigned len = sizeof (localbuf);
493 while((c = getc (fp)) != 0)
497 pool_debug (mypool, SAT_FATAL, "unexpected EOF\n");
500 /* Plus 1 as we also want to add the 0. */
507 memcpy (buf, localbuf, len - 256);
510 buf = xrealloc (buf, len);
515 add_attr_string (s, entry, name, (char*) buf);
520 case TYPE_ATTR_INTLIST:
523 while ((i = read_id(fp, 0)) & 64)
524 add_attr_intlist_int (s, entry, name, (i & 63) | ((i >> 1) & ~63));
525 add_attr_intlist_int (s, entry, name, (i & 63) | ((i >> 1) & ~63));
528 case TYPE_ATTR_LOCALIDS:
531 /* The read ID will be pool-based. */
532 while ((i = read_id(fp, maxid)) != 0)
536 add_attr_localids_id (s, entry, name, str2localid (s, id2str (pool, i), 1));
541 if (type >= TYPE_ATTR_SPECIAL_START && type <= TYPE_ATTR_SPECIAL_END)
543 add_attr_special_int (s, entry, name, type - TYPE_ATTR_SPECIAL_START);
546 pool_debug(pool, SAT_FATAL, "unknown type %d\n", type);
551 /* Make sure all pages from PSTART to PEND (inclusive) are loaded,
552 and are consecutive. Return a pointer to the mapping of PSTART. */
554 load_page_range (Attrstore *s, unsigned int pstart, unsigned int pend)
556 unsigned char buf[BLOB_PAGESIZE];
559 /* Quick check in case all pages are there already and consecutive. */
560 for (i = pstart; i <= pend; i++)
561 if (s->pages[i].mapped_at == -1
563 && s->pages[i].mapped_at
564 != s->pages[i-1].mapped_at + BLOB_PAGESIZE))
567 return s->blob_store + s->pages[pstart].mapped_at;
569 /* Ensure that we can map the numbers of pages we need at all. */
570 if (pend - pstart + 1 > s->ncanmap)
572 unsigned int oldcan = s->ncanmap;
573 s->ncanmap = pend - pstart + 1;
576 s->mapped = xrealloc (s->mapped, s->ncanmap * sizeof (s->mapped[0]));
577 memset (s->mapped + oldcan, 0, (s->ncanmap - oldcan) * sizeof (s->mapped[0]));
578 s->blob_store = xrealloc (s->blob_store, s->ncanmap * BLOB_PAGESIZE);
580 fprintf (stderr, "PAGE: can map %d pages\n", s->ncanmap);
584 /* Now search for "cheap" space in our store. Space is cheap if it's either
585 free (very cheap) or contains pages we search for anyway. */
587 /* Setup cost array. */
588 unsigned int cost[s->ncanmap];
589 for (i = 0; i < s->ncanmap; i++)
591 unsigned int pnum = s->mapped[i];
597 Attrblobpage *p = s->pages + pnum;
598 assert (p->mapped_at != -1);
599 if (pnum >= pstart && pnum <= pend)
606 /* And search for cheapest space. */
607 unsigned int best_cost = -1;
608 unsigned int best = 0;
609 unsigned int same_cost = 0;
610 for (i = 0; i + pend - pstart < s->ncanmap; i++)
612 unsigned int c = cost[i];
614 for (j = 0; j < pend - pstart + 1; j++)
617 best_cost = c, best = i;
618 else if (c == best_cost)
620 /* A null cost won't become better. */
624 /* If all places have the same cost we would thrash on slot 0. Avoid
625 this by doing a round-robin strategy in this case. */
626 if (same_cost == s->ncanmap - pend + pstart - 1)
627 best = s->rr_counter++ % (s->ncanmap - pend + pstart);
629 /* So we want to map our pages from [best] to [best+pend-pstart].
630 Use a very simple strategy, which doesn't make the best use of
631 our resources, but works. Throw away all pages in that range
632 (even ours) then copy around ours (in case they were outside the
633 range) or read them in. */
634 for (i = best; i < best + pend - pstart + 1; i++)
636 unsigned int pnum = s->mapped[i];
638 /* If this page is exactly at the right place already,
639 no need to evict it. */
640 && pnum != pstart + i - best)
642 /* Evict this page. */
644 fprintf (stderr, "PAGE: evict page %d from %d\n", pnum, i);
648 s->pages[pnum].mapped_at = -1;
652 /* Everything is free now. Read in the pages we want. */
653 for (i = pstart; i <= pend; i++)
655 Attrblobpage *p = s->pages + i;
656 unsigned int pnum = i - pstart + best;
657 void *dest = s->blob_store + pnum * BLOB_PAGESIZE;
658 if (p->mapped_at != -1)
660 if (p->mapped_at != pnum * BLOB_PAGESIZE)
663 fprintf (stderr, "PAGECOPY: %d to %d\n", i, pnum);
665 /* Still mapped somewhere else, so just copy it from there. */
666 memcpy (dest, s->blob_store + p->mapped_at, BLOB_PAGESIZE);
667 s->mapped[p->mapped_at / BLOB_PAGESIZE] = 0;
672 unsigned int in_len = p->file_size;
673 unsigned int compressed = in_len & 1;
676 fprintf (stderr, "PAGEIN: %d to %d", i, pnum);
678 /* Not mapped, so read in this page. */
679 if (fseek (s->file, p->file_offset, SEEK_SET) < 0)
681 perror ("mapping fseek");
684 if (fread (compressed ? buf : dest, in_len, 1, s->file) != 1)
686 perror ("mapping fread");
691 unsigned int out_len;
692 out_len = unchecked_decompress_buf (buf, in_len,
693 dest, BLOB_PAGESIZE);
694 if (out_len != BLOB_PAGESIZE
695 && i < s->num_pages - 1)
697 fprintf (stderr, "can't decompress\n");
701 fprintf (stderr, " (expand %d to %d)", in_len, out_len);
705 fprintf (stderr, "\n");
708 p->mapped_at = pnum * BLOB_PAGESIZE;
709 s->mapped[pnum] = i + 1;
712 return s->blob_store + best * BLOB_PAGESIZE;
716 attr_retrieve_blob (Attrstore *s, unsigned int ofs, unsigned int len)
721 unsigned int pstart = ofs / BLOB_PAGESIZE;
722 unsigned int pend = (ofs + len - 1) / BLOB_PAGESIZE;
723 const void *m = load_page_range (s, pstart, pend);
724 return m + (ofs & (BLOB_PAGESIZE - 1));
728 if (ofs >= s->blob_next_free)
730 return s->blob_store + ofs;
733 #define FLAT_ATTR_BLOCK 127
734 #define KEY_BLOCK 127
735 #define SCHEMA_BLOCK 127
737 #define add_elem(buf,ofs,val,block) do { \
738 if (((ofs) & (block)) == 0) \
739 buf = xrealloc (buf, ((ofs) + (block) + 1) * sizeof((buf)[0])); \
740 (buf)[(ofs)++] = val; \
742 #define add_u16(buf,ofs,val,block) do {\
743 typedef int __wrong_buf__[(1-sizeof((buf)[0])) * (sizeof((buf)[0])-1)];\
744 add_elem(buf,ofs,(val) & 0xFF,block); \
745 add_elem(buf,ofs,((val) >> 8) & 0xFF,block); \
747 #define add_num(buf,ofs,val,block) do {\
748 typedef int __wrong_buf__[(1-sizeof((buf)[0])) * (sizeof((buf)[0])-1)];\
749 if ((val) >= (1 << 14)) \
751 if ((val) >= (1 << 28)) \
752 add_elem (buf,ofs,((val) >> 28) | 128, block); \
753 if ((val) >= (1 << 21)) \
754 add_elem (buf,ofs,((val) >> 21) | 128, block); \
755 add_elem (buf,ofs,((val) >> 14) | 128, block); \
757 if ((val) >= (1 << 7)) \
758 add_elem (buf,ofs,((val) >> 7) | 128, block); \
759 add_elem (buf,ofs,(val) & 127, block); \
763 longnv_cmp (const void *pa, const void *pb)
765 const LongNV *a = (const LongNV *)pa;
766 const LongNV *b = (const LongNV *)pb;
767 return a->key - b->key;
771 add_key (Attrstore *s, Id name, unsigned type, unsigned size)
774 for (i = 0; i < s->nkeys; i++)
775 if (s->keys[i].name == name && s->keys[i].type == type)
779 s->keys[i].size += size;
782 if ((s->nkeys & KEY_BLOCK) == 0)
783 s->keys = xrealloc (s->keys, (s->nkeys + KEY_BLOCK + 1) * sizeof (s->keys[0]));
784 s->keys[i].name = name;
785 s->keys[i].type = type;
786 s->keys[i].size = size;
791 attr_store_pack (Attrstore *s)
794 unsigned int old_mem = 0;
797 s->ent2attr = xcalloc (s->entries, sizeof (s->ent2attr[0]));
799 s->attr_next_free = 0;
805 add_num (s->flat_attrs, s->attr_next_free, 0, FLAT_ATTR_BLOCK);
806 add_elem (s->schemata, s->szschemata, 0, SCHEMA_BLOCK);
807 add_elem (s->schemaofs, s->nschemata, 0, SCHEMA_BLOCK);
809 for (i = 0; i < s->entries; i++)
811 unsigned int num_attrs = 0, ofs;
812 LongNV *nv = s->attrs[i];
817 old_mem += (num_attrs + 1) * sizeof (LongNV);
821 qsort (s->attrs[i], num_attrs, sizeof (LongNV), longnv_cmp);
822 unsigned int this_schema;
823 for (this_schema = 0; this_schema < s->nschemata; this_schema++)
825 for (ofs = 0; ofs < num_attrs; ofs++)
827 Id key = nv[ofs].key;
828 assert (s->schemaofs[this_schema] + ofs < s->szschemata);
829 if (key != s->schemata[s->schemaofs[this_schema]+ofs])
832 if (ofs == num_attrs && !s->schemata[s->schemaofs[this_schema]+ofs])
835 if (this_schema == s->nschemata)
837 /* This schema not found --> insert it. */
838 add_elem (s->schemaofs, s->nschemata, s->szschemata, SCHEMA_BLOCK);
839 for (ofs = 0; ofs < num_attrs; ofs++)
841 Id key = nv[ofs].key;
842 add_elem (s->schemata, s->szschemata, key, SCHEMA_BLOCK);
844 add_elem (s->schemata, s->szschemata, 0, SCHEMA_BLOCK);
846 s->ent2attr[i] = s->attr_next_free;
847 add_num (s->flat_attrs, s->attr_next_free, this_schema, FLAT_ATTR_BLOCK);
848 for (ofs = 0; ofs < num_attrs; ofs++)
849 switch (s->keys[nv[ofs].key].type)
855 unsigned int i = nv[ofs].v.i[0];
856 add_num (s->flat_attrs, s->attr_next_free, i, FLAT_ATTR_BLOCK);
859 case TYPE_ATTR_CHUNK:
861 unsigned int i = nv[ofs].v.i[0];
862 add_num (s->flat_attrs, s->attr_next_free, i, FLAT_ATTR_BLOCK);
864 add_num (s->flat_attrs, s->attr_next_free, i, FLAT_ATTR_BLOCK);
867 case TYPE_ATTR_STRING:
869 const char *str = nv[ofs].v.str;
871 add_elem (s->flat_attrs, s->attr_next_free, *str, FLAT_ATTR_BLOCK);
872 add_elem (s->flat_attrs, s->attr_next_free, 0, FLAT_ATTR_BLOCK);
873 old_mem += strlen ((const char*)nv[ofs].v.str) + 1;
874 xfree ((void*)nv[ofs].v.str);
877 case TYPE_ATTR_INTLIST:
879 const int *il = nv[ofs].v.intlist;
881 //add_num (s->flat_attrs, s->attr_next_free, len, FLAT_ATTR_BLOCK);
882 old_mem += 4 * (len + 1);
887 i = (i & 63) | ((i & ~63) << 1);
890 add_num (s->flat_attrs, s->attr_next_free, i, FLAT_ATTR_BLOCK);
892 xfree (nv[ofs].v.intlist);
895 case TYPE_ATTR_LOCALIDS:
897 const Id *il = nv[ofs].v.localids;
899 for (; (i = *il) != 0; il++, old_mem += 4)
900 add_num (s->flat_attrs, s->attr_next_free, i, FLAT_ATTR_BLOCK);
901 add_num (s->flat_attrs, s->attr_next_free, 0, FLAT_ATTR_BLOCK);
903 xfree (nv[ofs].v.localids);
911 old_mem += s->entries * sizeof (s->attrs[0]);
915 /* Remove the hashtable too, it will be build on demand in str2localid
916 the next time we call it, which should not happen while in packed mode. */
917 old_mem += (s->ss.stringhashmask + 1) * sizeof (s->ss.stringhashtbl[0]);
918 free (s->ss.stringhashtbl);
919 s->ss.stringhashtbl = 0;
920 s->ss.stringhashmask = 0;
922 fprintf (stderr, "%d\n", old_mem);
923 fprintf (stderr, "%zd\n", s->entries * sizeof(s->ent2attr[0]));
924 fprintf (stderr, "%d\n", s->attr_next_free);
925 fprintf (stderr, "%zd\n", s->nschemata * sizeof(s->schemaofs[0]));
926 fprintf (stderr, "%zd\n", s->szschemata * sizeof(s->schemata[0]));
927 fprintf (stderr, "pages %d\n", s->num_pages);
931 /* Pages in all blob pages, and deactivates paging. */
933 pagein_all (Attrstore *s)
935 /* If we have no backing file everything is there already. */
938 /*fprintf (stderr, "Aieee!\n");
943 attr_store_unpack (Attrstore *s)
951 /* Make the store writable right away, so we can use our adder functions. */
953 s->attrs = xcalloc (s->entries, sizeof (s->attrs[0]));
955 for (i = 0; i < s->entries; i++)
958 FOR_ATTRS (s, i, &ai)
963 add_attr_void (s, i, ai.name);
966 add_attr_int (s, i, ai.name, ai.as_int);
968 case TYPE_ATTR_CHUNK:
969 add_attr_chunk (s, i, ai.name, ai.as_chunk[0], ai.as_chunk[1]);
971 case TYPE_ATTR_STRING:
972 add_attr_string (s, i, ai.name, ai.as_string);
974 case TYPE_ATTR_INTLIST:
979 get_num (ai.as_numlist, val);
980 add_attr_intlist_int (s, i, ai.name, (val & 63) | ((val >> 1) & ~63));
986 case TYPE_ATTR_LOCALIDS:
991 get_num (ai.as_numlist, val);
994 add_attr_localids_id (s, i, ai.name, val);
999 if (ai.type >= TYPE_ATTR_SPECIAL_START
1000 && ai.type <= TYPE_ATTR_SPECIAL_END)
1001 add_attr_special_int (s, i, ai.name, ai.type - TYPE_ATTR_SPECIAL_START);
1007 xfree (s->ent2attr);
1009 xfree (s->flat_attrs);
1011 s->attr_next_free = 0;
1012 xfree (s->schemaofs);
1015 xfree (s->schemata);
1021 write_u8(FILE *fp, unsigned int x)
1023 if (putc(x, fp) == EOF)
1025 perror("write error");
1031 write_u32(FILE *fp, unsigned int x)
1033 if (putc(x >> 24, fp) == EOF ||
1034 putc(x >> 16, fp) == EOF ||
1035 putc(x >> 8, fp) == EOF ||
1038 perror("write error");
1044 write_id(FILE *fp, Id x)
1049 putc((x >> 28) | 128, fp);
1051 putc((x >> 21) | 128, fp);
1052 putc((x >> 14) | 128, fp);
1055 putc((x >> 7) | 128, fp);
1056 if (putc(x & 127, fp) == EOF)
1058 perror("write error");
1064 write_idarray(FILE *fp, Id *ids)
1078 id = (id & 63) | ((id & ~63) << 1);
1084 write_id(fp, id | 64);
1089 write_pages (FILE *fp, Attrstore *s)
1092 unsigned char buf[BLOB_PAGESIZE];
1094 /* The compressed pages in the file have different sizes, so we need
1095 to store these sizes somewhere, either in front of all page data,
1096 interleaved with the page data (in front of each page), or after
1097 the page data. At this point we don't yet know the final compressed
1098 sizes. These are the pros and cons:
1099 * in front of all page data
1100 + when reading back we only have to read this header, and know
1101 where every page data is placed
1102 - we have to compress all pages first before starting to write them.
1103 Our output stream might be unseekable, so we can't simply
1104 reserve space for the header, write all pages and then update the
1105 header. This needs memory for all compressed pages at once.
1106 * interleaved with page data
1107 + we can compress and write per page, low memory overhead
1108 - when reading back we have to read at least those numbers,
1109 thereby either having to read all page data, or at least seek
1111 * after all page data
1112 + we can do streamed writing, remembering the sizes per page,
1113 and emitting the header (which is a footer then) at the end
1114 - reading back is hardest: before the page data we don't know
1115 how long it is overall, so we have to put that information
1116 also at the end, but it needs a determinate position, so can
1117 only be at a known offset from the end. But that means that
1118 we must be able to seek when reading back. We have this
1119 wish anyway in case we want to use on-demand paging then, but
1122 Of all these it seems the best good/bad ratio is with the interleaved
1123 storage. No memory overhead at writing and no unreasonable limitations
1125 write_u32 (fp, s->blob_next_free);
1126 write_u32 (fp, BLOB_PAGESIZE);
1127 assert (((s->blob_next_free + BLOB_PAGESIZE - 1) / BLOB_PAGESIZE) == s->num_pages);
1128 for (i = 0; i < s->num_pages; i++)
1130 unsigned int in_len;
1131 unsigned int out_len;
1133 if (i == s->num_pages - 1)
1134 in_len = s->blob_next_free & (BLOB_PAGESIZE - 1);
1136 in_len = BLOB_PAGESIZE;
1139 in = attr_retrieve_blob (s, i * BLOB_PAGESIZE, in_len);
1140 out_len = compress_buf (in, in_len, buf, in_len - 1);
1143 memcpy (buf, in, in_len);
1150 fprintf (stderr, "page %d: %d -> %d\n", i, in_len, out_len);
1152 write_u32 (fp, out_len * 2 + (out_len != in_len));
1154 && fwrite (buf, out_len, 1, fp) != 1)
1156 perror("write error");
1163 write_attr_store (FILE *fp, Attrstore *s)
1166 unsigned local_ssize;
1168 attr_store_pack (s);
1170 /* Transform our attribute names (pool string IDs) into local IDs. */
1171 for (i = 1; i < s->nkeys; i++)
1172 s->keys[i].name = str2localid (s, id2str (s->pool, s->keys[i].name), 1);
1174 /* write file header */
1175 write_u32(fp, 'S' << 24 | 'O' << 16 | 'L' << 8 | 'V');
1176 write_u32(fp, SOLV_VERSION_2);
1179 write_u32(fp, s->ss.nstrings); // nstrings
1180 write_u32(fp, 0); // nrels
1181 write_u32(fp, s->entries); // nsolvables
1182 write_u32(fp, s->nkeys);
1183 write_u32(fp, s->nschemata);
1184 write_u32(fp, 0); /* no info block */
1185 unsigned solv_flags = 0;
1186 solv_flags |= SOLV_FLAG_PACKEDSIZES;
1187 //solv_flags |= SOLV_FLAG_PREFIX_POOL;
1188 write_u32(fp, solv_flags);
1190 for (i = 1, local_ssize = 0; i < (unsigned)s->ss.nstrings; i++)
1191 local_ssize += strlen (localid2str (s, i)) + 1;
1193 write_u32 (fp, local_ssize);
1194 for (i = 1; i < (unsigned)s->ss.nstrings; i++)
1196 const char *str = localid2str (s, i);
1197 if (fwrite(str, strlen(str) + 1, 1, fp) != 1)
1199 perror("write error");
1204 for (i = 1; i < s->nkeys; i++)
1206 write_id (fp, s->keys[i].name);
1207 write_id (fp, s->keys[i].type);
1208 write_id (fp, s->keys[i].size);
1210 /* Also transform back the names (now local IDs) into pool IDs,
1211 so we can use the pool also after writing. */
1212 s->keys[i].name = str2id (s->pool, localid2str (s, s->keys[i].name), 0);
1215 write_id (fp, s->szschemata);
1216 Id *ids = s->schemata + 0;
1217 for (i = 0; i < s->nschemata; i++)
1218 ids = write_idarray (fp, ids);
1219 assert (ids == s->schemata + s->szschemata);
1221 /* Convert our offsets into sizes. */
1222 unsigned end = s->attr_next_free;
1223 for (i = s->entries; i > 0;)
1228 s->ent2attr[i] = end - s->ent2attr[i];
1229 end = end - s->ent2attr[i];
1232 /* The first zero should not have been consumed, but everything else. */
1234 /* Write the sizes and convert back to offsets. */
1236 for (i = 0; i < s->entries; i++)
1238 write_id (fp, s->ent2attr[i]);
1240 s->ent2attr[i] += start, start = s->ent2attr[i];
1244 && fwrite (s->flat_attrs + 1, s->attr_next_free - 1, 1, fp) != 1)
1246 perror ("write error");
1250 write_pages (fp, s);
1259 for (i = 0; i < 4; i++)
1264 fprintf(stderr, "unexpected EOF\n");
1273 read_id(FILE *fp, Id max)
1278 for (i = 0; i < 5; i++)
1283 fprintf(stderr, "unexpected EOF\n");
1289 if (max && x >= max)
1291 fprintf(stderr, "read_id: id too large (%u/%u)\n", x, max);
1296 x = (x << 7) ^ c ^ 128;
1298 fprintf(stderr, "read_id: id too long\n");
1303 read_idarray(FILE *fp, Id max, Id *map, Id *store, Id *end, int relative)
1313 pool_debug(mypool, SAT_FATAL, "unexpected EOF\n");
1318 x = (x << 6) | (c & 63);
1321 if (x == 0 && c == 0x40)
1326 pool_debug(mypool, SAT_FATAL, "read_idarray: array overflow\n");
1329 *store++ = SOLVABLE_PREREQMARKER;
1339 pool_debug(mypool, SAT_FATAL, "read_idarray: id too large (%u/%u)\n", x, max);
1346 pool_debug(mypool, SAT_FATAL, "read_idarray: array overflow\n");
1352 if (x == 0) /* already have trailing zero? */
1356 pool_debug(mypool, SAT_FATAL, "read_idarray: array overflow\n");
1365 x = (x << 7) ^ c ^ 128;
1369 /* Try to either setup on-demand paging (using FP as backing
1370 file), or in case that doesn't work (FP not seekable) slurps in
1371 all pages and deactivates paging. */
1373 read_or_setup_pages (FILE *fp, Attrstore *s)
1375 unsigned int blobsz;
1376 unsigned int pagesz;
1377 unsigned int npages;
1379 unsigned int can_seek;
1381 unsigned char buf[BLOB_PAGESIZE];
1382 blobsz = read_u32 (fp);
1383 pagesz = read_u32 (fp);
1384 if (pagesz != BLOB_PAGESIZE)
1386 /* We could handle this by slurping in everything. */
1387 fprintf (stderr, "non matching page size\n");
1391 if ((cur_file_ofs = ftell (fp)) < 0)
1394 fprintf (stderr, "can %sseek\n", can_seek ? "" : "NOT ");
1395 npages = (blobsz + BLOB_PAGESIZE - 1) / BLOB_PAGESIZE;
1397 s->num_pages = npages;
1398 s->pages = xmalloc (npages * sizeof (s->pages[0]));
1400 /* If we can't seek on our input we have to slurp in everything. */
1403 s->blob_next_free = blobsz;
1404 s->blob_store = xrealloc (s->blob_store, (s->blob_next_free + BLOB_BLOCK) &~BLOB_BLOCK);
1406 for (i = 0; i < npages; i++)
1408 unsigned int in_len = read_u32 (fp);
1409 unsigned int compressed = in_len & 1;
1410 Attrblobpage *p = s->pages + i;
1413 fprintf (stderr, "page %d: len %d (%scompressed)\n",
1414 i, in_len, compressed ? "" : "not ");
1420 p->file_offset = cur_file_ofs;
1421 p->file_size = in_len * 2 + compressed;
1422 if (fseek (fp, in_len, SEEK_CUR) < 0)
1425 fprintf (stderr, "can't seek after we thought we can\n");
1426 /* We can't fall back to non-seeking behaviour as we already
1427 read over some data pages without storing them away. */
1430 cur_file_ofs += in_len;
1434 unsigned int out_len;
1435 void *dest = s->blob_store + i * BLOB_PAGESIZE;
1436 p->mapped_at = i * BLOB_PAGESIZE;
1439 /* We can't seek, so suck everything in. */
1440 if (fread (compressed ? buf : dest, in_len, 1, fp) != 1)
1447 out_len = unchecked_decompress_buf (buf, in_len,
1448 dest, BLOB_PAGESIZE);
1449 if (out_len != BLOB_PAGESIZE
1452 fprintf (stderr, "can't decompress\n");
1461 /* If we are here we were able to seek to all page
1462 positions, so activate paging by copying FP into our structure.
1463 We dup() the file, so that our callers can fclose() it and we
1464 still have it open. But this means that we share file positions
1465 with the input filedesc. So in case our caller reads it after us,
1466 and calls back into us we might change the file position unexpectedly
1468 int fd = dup (fileno (fp));
1471 /* Jeez! What a bloody system, we can't dup() anymore. */
1475 /* XXX we don't close this yet anywhere. */
1476 s->file = fdopen (fd, "r");
1479 /* My God! What happened now? */
1487 attr_store_read (FILE *fp, Pool *pool)
1491 unsigned local_ssize;
1492 unsigned nstrings, nschemata;
1493 Attrstore *s = new_store (pool);
1495 if (read_u32(fp) != ('S' << 24 | 'O' << 16 | 'L' << 8 | 'V'))
1497 pool_debug(pool, SAT_FATAL, "not a SOLV file\n");
1500 unsigned solvversion = read_u32(fp);
1501 switch (solvversion)
1503 case SOLV_VERSION_2:
1506 pool_debug(pool, SAT_FATAL, "unsupported SOLV version\n");
1510 nstrings = read_u32(fp);
1511 read_u32(fp); //nrels
1512 nentries = read_u32(fp);
1513 s->nkeys = read_u32(fp);
1514 nschemata = read_u32(fp);
1515 read_u32(fp); //ninfo
1516 unsigned solvflags = read_u32(fp);
1517 if (!(solvflags & SOLV_FLAG_PACKEDSIZES))
1519 pool_debug(pool, SAT_FATAL, "invalid attribute store\n");
1523 /* Slightly hacky. Our local string pool already contains "<NULL>" and
1524 "". We write out the "" too, so we have to read over it. We write it
1525 out to be compatible with the SOLV file and to not have to introduce
1526 merging and mapping the string IDs. */
1527 local_ssize = read_u32 (fp) - 1;
1528 char *strsp = (char *)xrealloc(s->ss.stringspace, s->ss.sstrings + local_ssize + 1);
1529 Offset *str = (Offset *)xrealloc(s->ss.strings, (nstrings) * sizeof(Offset));
1531 s->ss.stringspace = strsp;
1532 s->ss.strings = str;
1533 strsp += s->ss.sstrings;
1535 unsigned char ignore_char = 1;
1536 if (fread(&ignore_char, 1, 1, fp) != 1
1537 || (local_ssize && fread(strsp, local_ssize, 1, fp) != 1)
1538 || ignore_char != 0)
1540 perror ("read error while reading strings");
1543 strsp[local_ssize] = 0;
1545 /* Don't build hashtable here, it will be built on demand by str2localid
1546 should we call that. */
1548 strsp = s->ss.stringspace;
1549 s->ss.nstrings = nstrings;
1550 for (i = 0; i < nstrings; i++)
1552 str[i] = strsp - s->ss.stringspace;
1553 strsp += strlen (strsp) + 1;
1555 s->ss.sstrings = strsp - s->ss.stringspace;
1557 s->keys = xrealloc (s->keys, ((s->nkeys + KEY_BLOCK) & ~KEY_BLOCK) * sizeof (s->keys[0]));
1558 /* s->keys[0] is initialized in new_store. */
1559 for (i = 1; i < s->nkeys; i++)
1561 s->keys[i].name = read_id (fp, nstrings);
1562 s->keys[i].type = read_id (fp, TYPE_ATTR_TYPE_MAX + 1);
1563 s->keys[i].size = read_id (fp, 0);
1565 /* Globalize the attribute names (they are local IDs right now). */
1566 s->keys[i].name = str2id (s->pool, localid2str (s, s->keys[i].name), 1);
1569 s->szschemata = read_id (fp, 0);
1571 s->schemata = xmalloc (((s->szschemata + SCHEMA_BLOCK) & ~SCHEMA_BLOCK) * sizeof (s->schemata[0]));
1573 Id *ids = s->schemata;
1574 //add_elem (s->schemaofs, s->nschemata, 0, SCHEMA_BLOCK);
1576 while (ids < s->schemata + s->szschemata)
1578 add_elem (s->schemaofs, s->nschemata, ids - s->schemata, SCHEMA_BLOCK);
1579 ids = read_idarray (fp, s->nkeys, 0, ids, s->schemata + s->szschemata, 0);
1581 assert (ids == s->schemata + s->szschemata);
1582 assert (nschemata == s->nschemata);
1584 s->entries = nentries;
1586 s->ent2attr = xmalloc (s->entries * sizeof (s->ent2attr[0]));
1588 for (i = 0; i < s->entries; i++)
1590 int d = read_id (fp, 0);
1592 s->ent2attr[i] = start, start += d;
1597 s->attr_next_free = start;
1598 s->flat_attrs = xmalloc (((s->attr_next_free + FLAT_ATTR_BLOCK) & ~FLAT_ATTR_BLOCK) * sizeof (s->flat_attrs[0]));
1599 s->flat_attrs[0] = 0;
1600 if (s->entries && fread (s->flat_attrs + 1, s->attr_next_free - 1, 1, fp) != 1)
1602 perror ("read error");
1606 read_or_setup_pages (fp, s);
1614 attr_store_search_s (Attrstore *s, const char *pattern, int flags, Id name, cb_attr_search_s cb)
1619 /* If we search for a glob, but we don't have a wildcard pattern, make this
1620 an exact string search. */
1621 if ((flags & 7) == SEARCH_GLOB
1622 && !strpbrk (pattern, "?*["))
1623 flags = SEARCH_STRING | (flags & ~7);
1624 if ((flags & 7) == SEARCH_REGEX)
1626 /* We feed multiple lines eventually (e.g. authors or descriptions),
1627 so set REG_NEWLINE. */
1628 if (regcomp (®ex, pattern,
1629 REG_EXTENDED | REG_NOSUB | REG_NEWLINE
1630 | ((flags & SEARCH_NOCASE) ? REG_ICASE : 0)) != 0)
1633 for (i = 0; i < s->entries; i++)
1634 FOR_ATTRS (s, i, &ai)
1637 if (name && name != ai.name)
1643 case TYPE_ATTR_INTLIST:
1645 case TYPE_ATTR_CHUNK:
1646 if (!(flags & SEARCH_BLOBS))
1648 str = attr_retrieve_blob (s, ai.as_chunk[0], ai.as_chunk[1]);
1650 case TYPE_ATTR_STRING:
1653 case TYPE_ATTR_LOCALIDS:
1656 get_num (ai.as_numlist, val);
1658 str = localid2str (s, val);
1666 unsigned int match = 0;
1669 case SEARCH_SUBSTRING:
1670 if (flags & SEARCH_NOCASE)
1671 match = !! strcasestr (str, pattern);
1673 match = !! strstr (str, pattern);
1676 if (flags & SEARCH_NOCASE)
1677 match = ! strcasecmp (str, pattern);
1679 match = ! strcmp (str, pattern);
1682 match = ! fnmatch (pattern, str,
1683 (flags & SEARCH_NOCASE) ? FNM_CASEFOLD : 0);
1686 match = ! regexec (®ex, str, 0, NULL, 0);
1692 cb (s, i, ai.name, str);
1693 if (ai.type != TYPE_ATTR_LOCALIDS)
1696 get_num (ai.as_numlist, val);
1699 str = localid2str (s, val);
1702 if ((flags & 7) == SEARCH_REGEX)
1710 Pool *pool = pool_create ();
1711 Attrstore *s = new_store (pool);
1712 unsigned int id1 = new_entry (s);
1713 unsigned int id2 = new_entry (s);
1714 unsigned int id3 = new_entry (s);
1715 unsigned int id4 = new_entry (s);
1716 add_attr_int (s, id1, str2id (s, "name1", 1), 42);
1717 add_attr_chunk (s, id1, str2id (s->pool, "name2", 1), 9876, 1024);
1718 add_attr_string (s, id1, str2id (s->pool, "name3", 1), "hallo");
1719 add_attr_int (s, id1, str2id (s->pool, "name1", 1), 43);
1720 add_attr_intlist_int (s, id1, str2id (s->pool, "intlist1", 1), 3);
1721 add_attr_intlist_int (s, id1, str2id (s->pool, "intlist1", 1), 14);
1722 add_attr_intlist_int (s, id1, str2id (s->pool, "intlist1", 1), 1);
1723 add_attr_intlist_int (s, id1, str2id (s->pool, "intlist1", 1), 59);
1724 add_attr_localids_id (s, id1, str2id (s->pool, "l_ids1", 1), str2localid (s, "one", 1));
1725 add_attr_localids_id (s, id1, str2id (s->pool, "l_ids1", 1), str2localid (s, "two", 1));
1726 add_attr_localids_id (s, id1, str2id (s->pool, "l_ids1", 1), str2localid (s, "three", 1));
1727 add_attr_localids_id (s, id1, str2id (s->pool, "l_ids2", 1), str2localid (s, "three", 1));
1728 add_attr_localids_id (s, id1, str2id (s->pool, "l_ids2", 1), str2localid (s, "two", 1));
1729 add_attr_localids_id (s, id1, str2id (s->pool, "l_ids2", 1), str2localid (s, "one", 1));
1730 write_attr_store (stdout, s);