2 * Copyright (c) 2007, Novell Inc.
4 * This program is licensed under the BSD license, read LICENSE.BSD
5 * for further information
17 #include "attr_store.h"
22 #include "attr_store_p.h"
27 #define TYPE_WIDTH (16-NAME_WIDTH)
29 #define BLOB_BLOCK 65535
31 #define STRINGSPACE_BLOCK 1023
32 #define STRING_BLOCK 127
33 #define LOCALID_NULL 0
34 #define LOCALID_EMPTY 1
37 new_store (Pool *pool)
39 Attrstore *s = calloc (1, sizeof (Attrstore));
41 s->nameids = calloc (128, sizeof (s->nameids[0]));
46 int totalsize = strlen ("<NULL>") + 1 + 1;
49 // alloc appropriate space
50 s->stringspace = (char *)xmalloc((totalsize + STRINGSPACE_BLOCK) & ~STRINGSPACE_BLOCK);
51 s->strings = (Offset *)xmalloc(((count + STRING_BLOCK) & ~STRING_BLOCK) * sizeof(Offset));
53 // now copy predefined strings into allocated space
55 strcpy (s->stringspace + s->sstrings, "<NULL>");
56 s->strings[0] = s->sstrings;
57 s->sstrings += strlen (s->stringspace + s->strings[0]) + 1;
58 strcpy (s->stringspace + s->sstrings, "");
59 s->strings[1] = s->sstrings;
60 s->sstrings += strlen (s->stringspace + s->strings[1]) + 1;
68 str2localid (Attrstore *s, const char *str, int create)
83 hashmask = s->stringhashmask;
84 hashtbl = s->stringhashtbl;
86 // expand hashtable if needed
87 if (s->nstrings * 2 > hashmask)
92 s->stringhashmask = hashmask = mkmask(s->nstrings + STRING_BLOCK);
93 s->stringhashtbl = hashtbl = (Hashtable)xcalloc(hashmask + 1, sizeof(Id));
95 // rehash all strings into new hashtable
96 for (i = 1; i < s->nstrings; i++)
98 h = strhash(s->stringspace + s->strings[i]) & hashmask;
100 while (hashtbl[h] != 0)
101 h = HASHCHAIN_NEXT(h, hh, hashmask);
106 // compute hash and check for match
108 h = strhash(str) & hashmask;
109 hh = HASHCHAIN_START;
110 while ((id = hashtbl[h]) != 0)
112 // break if string already hashed
113 if(!strcmp(s->stringspace + s->strings[id], str))
115 h = HASHCHAIN_NEXT(h, hh, hashmask);
117 if (id || !create) // exit here if string found
120 // generate next id and save in table
124 if ((id & STRING_BLOCK) == 0)
125 s->strings = xrealloc(s->strings, ((s->nstrings + STRING_BLOCK) & ~STRING_BLOCK) * sizeof(Hashval));
126 // 'pointer' into stringspace is Offset of next free pos: sstrings
127 s->strings[id] = s->sstrings;
129 space_needed = strlen(str) + 1;
131 // resize string buffer if needed
132 if (((s->sstrings + space_needed - 1) | STRINGSPACE_BLOCK) != ((s->sstrings - 1) | STRINGSPACE_BLOCK))
133 s->stringspace = xrealloc(s->stringspace, (s->sstrings + space_needed + STRINGSPACE_BLOCK) & ~STRINGSPACE_BLOCK);
134 // copy new string into buffer
135 memcpy(s->stringspace + s->sstrings, str, space_needed);
136 // next free pos is behind new string
137 s->sstrings += space_needed;
143 localid2str(Attrstore *s, LocalId id)
145 return s->stringspace + s->strings[id];
149 id2nameid (Attrstore *s, Id id)
152 for (i = 0; i < s->num_nameids; i++)
153 if (s->nameids[i] == id)
155 if (s->num_nameids >= (1 << NAME_WIDTH))
157 fprintf (stderr, "Too many attribute names\n");
160 if ((s->num_nameids & 127) == 0)
161 s->nameids = realloc (s->nameids, ((s->num_nameids+128) * sizeof (s->nameids[0])));
162 s->nameids[s->num_nameids++] = id;
163 return s->num_nameids - 1;
167 str2nameid (Attrstore *s, const char *str)
169 return id2nameid (s, str2id (s->pool, str, 1));
173 ensure_entry (Attrstore *s, unsigned int entry)
175 unsigned int old_num = s->entries;
176 if (entry < s->entries)
178 s->entries = entry + 1;
179 if (((old_num + 127) & ~127) != ((s->entries + 127) & ~127))
182 s->attrs = realloc (s->attrs, (((s->entries+127) & ~127) * sizeof (s->attrs[0])));
184 s->attrs = malloc (((s->entries+127) & ~127) * sizeof (s->attrs[0]));
186 memset (s->attrs + old_num, 0, (s->entries - old_num) * sizeof (s->attrs[0]));
190 new_entry (Attrstore *s)
192 if ((s->entries & 127) == 0)
195 s->attrs = realloc (s->attrs, ((s->entries+128) * sizeof (s->attrs[0])));
197 s->attrs = malloc ((s->entries+128) * sizeof (s->attrs[0]));
199 s->attrs[s->entries++] = 0;
200 return s->entries - 1;
204 find_attr (Attrstore *s, unsigned int entry, NameId name)
207 if (entry >= s->entries)
209 if (name >= s->num_nameids)
211 nv = s->attrs[entry];
214 while (nv->name && nv->name != name)
223 add_attr (Attrstore *s, unsigned int entry, LongNV attr)
227 if (entry >= s->entries)
229 if (attr.name >= s->num_nameids)
231 nv = s->attrs[entry];
235 while (nv->name && nv->name != attr.name)
239 len = nv - s->attrs[entry];
243 s->attrs[entry] = realloc (s->attrs[entry], len * sizeof (LongNV));
245 s->attrs[entry] = malloc (len * sizeof (LongNV));
246 nv = s->attrs[entry] + len - 2;
252 add_attr_int (Attrstore *s, unsigned int entry, NameId name, unsigned int val)
258 add_attr (s, entry, nv);
262 add_attr_chunk (Attrstore *s, unsigned int entry, NameId name, unsigned int ofs, unsigned int len)
266 nv.type = ATTR_CHUNK;
269 add_attr (s, entry, nv);
273 add_attr_blob (Attrstore *s, unsigned int entry, NameId name, const void *ptr, unsigned int len)
275 if (((s->blob_next_free + BLOB_BLOCK) & ~BLOB_BLOCK)
276 != ((s->blob_next_free + len + BLOB_BLOCK) & ~BLOB_BLOCK))
278 unsigned int blobsz = (s->blob_next_free + len + BLOB_BLOCK) &~BLOB_BLOCK;
279 s->blob_store = xrealloc (s->blob_store, blobsz);
281 memcpy (s->blob_store + s->blob_next_free, ptr, len);
282 add_attr_chunk (s, entry, name, s->blob_next_free, len);
283 s->blob_next_free += len;
285 unsigned int npages = (s->blob_next_free + BLOB_PAGESIZE - 1) / BLOB_PAGESIZE;
286 if (npages != s->num_pages)
289 s->pages = xrealloc (s->pages, npages * sizeof (s->pages[0]));
290 for (p = s->pages + s->num_pages; s->num_pages < npages;
293 p->mapped_at = s->num_pages * BLOB_PAGESIZE;
301 add_attr_string (Attrstore *s, unsigned int entry, NameId name, const char *val)
305 nv.type = ATTR_STRING;
306 nv.v.str = strdup (val);
307 add_attr (s, entry, nv);
311 add_attr_id (Attrstore *s, unsigned int entry, NameId name, Id val)
317 add_attr (s, entry, nv);
321 add_attr_intlist_int (Attrstore *s, unsigned int entry, NameId name, int val)
323 LongNV *nv = find_attr (s, entry, name);
329 while (nv->v.intlist[len])
331 nv->v.intlist = realloc (nv->v.intlist, (len + 2) * sizeof (nv->v.intlist[0]));
332 nv->v.intlist[len] = val;
333 nv->v.intlist[len+1] = 0;
339 mynv.type = ATTR_INTLIST;
340 mynv.v.intlist = malloc (2 * sizeof (mynv.v.intlist[0]));
341 mynv.v.intlist[0] = val;
342 mynv.v.intlist[1] = 0;
343 add_attr (s, entry, mynv);
348 add_attr_localids_id (Attrstore *s, unsigned int entry, NameId name, LocalId id)
350 LongNV *nv = find_attr (s, entry, name);
354 while (nv->v.localids[len])
356 nv->v.localids = realloc (nv->v.localids, (len + 2) * sizeof (nv->v.localids[0]));
357 nv->v.localids[len] = id;
358 nv->v.localids[len+1] = 0;
364 mynv.type = ATTR_LOCALIDS;
365 mynv.v.localids = malloc (2 * sizeof (mynv.v.localids[0]));
366 mynv.v.localids[0] = id;
367 mynv.v.localids[1] = 0;
368 add_attr (s, entry, mynv);
372 /* Make sure all pages from PSTART to PEND (inclusive) are loaded,
373 and are consecutive. Return a pointer to the mapping of PSTART. */
375 load_page_range (Attrstore *s, unsigned int pstart, unsigned int pend)
377 unsigned char buf[BLOB_PAGESIZE];
380 /* Quick check in case all pages are there already and consecutive. */
381 for (i = pstart; i <= pend; i++)
382 if (s->pages[i].mapped_at == -1
384 && s->pages[i].mapped_at
385 != s->pages[i-1].mapped_at + BLOB_PAGESIZE))
388 return s->blob_store + s->pages[pstart].mapped_at;
390 /* Ensure that we can map the numbers of pages we need at all. */
391 if (pend - pstart + 1 > s->ncanmap)
393 unsigned int oldcan = s->ncanmap;
394 s->ncanmap = pend - pstart + 1;
397 s->mapped = xrealloc (s->mapped, s->ncanmap * sizeof (s->mapped[0]));
398 memset (s->mapped + oldcan, 0, (s->ncanmap - oldcan) * sizeof (s->mapped[0]));
399 s->blob_store = xrealloc (s->blob_store, s->ncanmap * BLOB_PAGESIZE);
400 fprintf (stderr, "PAGE: can map %d pages\n", s->ncanmap);
403 /* Now search for "cheap" space in our store. Space is cheap if it's either
404 free (very cheap) or contains pages we search for anyway. */
406 /* Setup cost array. */
407 unsigned int cost[s->ncanmap];
408 for (i = 0; i < s->ncanmap; i++)
410 unsigned int pnum = s->mapped[i];
416 Attrblobpage *p = s->pages + pnum;
417 assert (p->mapped_at != -1);
418 if (pnum >= pstart && pnum <= pend)
425 /* And search for cheapest space. */
426 unsigned int best_cost = -1;
427 unsigned int best = 0;
428 for (i = 0; i + pend - pstart < s->ncanmap; i++)
430 unsigned int c = cost[i];
432 for (j = 0; j < pend - pstart + 1; j++)
435 best_cost = c, best = i;
436 /* A null cost won't become better. */
441 /* So we want to map our pages from [best] to [best+pend-pstart].
442 Use a very simple strategy, which doesn't make the best use of
443 our resources, but works. Throw away all pages in that range
444 (even ours) then copy around ours (in case they were outside the
445 range) or read them in. */
446 for (i = best; i < best + pend - pstart + 1; i++)
448 unsigned int pnum = s->mapped[i];
450 /* If this page is exactly at the right place already,
451 no need to evict it. */
452 && pnum != pstart + i - best)
454 /* Evict this page. */
455 fprintf (stderr, "PAGE: evict page %d from %d\n", pnum, i);
458 s->pages[pnum].mapped_at = -1;
462 /* Everything is free now. Read in the pages we want. */
463 for (i = pstart; i <= pend; i++)
465 Attrblobpage *p = s->pages + i;
466 unsigned int pnum = i - pstart + best;
467 void *dest = s->blob_store + pnum * BLOB_PAGESIZE;
468 if (p->mapped_at != -1)
470 if (p->mapped_at != pnum * BLOB_PAGESIZE)
472 fprintf (stderr, "PAGECOPY: %d to %d\n", i, pnum);
473 /* Still mapped somewhere else, so just copy it from there. */
474 memcpy (dest, s->blob_store + p->mapped_at, BLOB_PAGESIZE);
475 s->mapped[p->mapped_at / BLOB_PAGESIZE] = 0;
480 unsigned int in_len = p->file_size;
481 unsigned int compressed = in_len & 1;
483 fprintf (stderr, "PAGEIN: %d to %d", i, pnum);
484 /* Not mapped, so read in this page. */
485 if (fseek (s->file, p->file_offset, SEEK_SET) < 0)
487 perror ("mapping fseek");
490 if (fread (compressed ? buf : dest, in_len, 1, s->file) != 1)
492 perror ("mapping fread");
497 unsigned int out_len;
498 out_len = unchecked_decompress_buf (buf, in_len,
499 dest, BLOB_PAGESIZE);
500 if (out_len != BLOB_PAGESIZE
501 && i < s->num_pages - 1)
503 fprintf (stderr, "can't decompress\n");
506 fprintf (stderr, " (expand %d to %d)", in_len, out_len);
508 fprintf (stderr, "\n");
510 p->mapped_at = pnum * BLOB_PAGESIZE;
511 s->mapped[pnum] = i + 1;
514 return s->blob_store + best * BLOB_PAGESIZE;
518 attr_retrieve_blob (Attrstore *s, unsigned int ofs, unsigned int len)
523 unsigned int pstart = ofs / BLOB_PAGESIZE;
524 unsigned int pend = (ofs + len - 1) / BLOB_PAGESIZE;
525 const void *m = load_page_range (s, pstart, pend);
526 return m + (ofs & (BLOB_PAGESIZE - 1));
530 if (ofs >= s->blob_next_free)
532 return s->blob_store + ofs;
535 #define FLAT_ATTR_BLOCK 127
536 #define ABBR_BLOCK 127
537 #define FLAT_ABBR_BLOCK 127
538 #define add_elem(buf,ofs,val,block) do { \
539 if (((ofs) & (block)) == 0) \
540 buf = xrealloc (buf, ((ofs) + (block) + 1) * sizeof((buf)[0])); \
541 (buf)[(ofs)++] = val; \
543 #define add_u16(buf,ofs,val,block) do {\
544 typedef int __wrong_buf__[(1-sizeof((buf)[0])) * (sizeof((buf)[0])-1)];\
545 add_elem(buf,ofs,(val) & 0xFF,block); \
546 add_elem(buf,ofs,((val) >> 8) & 0xFF,block); \
548 #define add_num(buf,ofs,val,block) do {\
549 typedef int __wrong_buf__[(1-sizeof((buf)[0])) * (sizeof((buf)[0])-1)];\
550 if ((val) >= (1 << 14)) \
552 if ((val) >= (1 << 28)) \
553 add_elem (buf,ofs,((val) >> 28) | 128, block); \
554 if ((val) >= (1 << 21)) \
555 add_elem (buf,ofs,((val) >> 21) | 128, block); \
556 add_elem (buf,ofs,((val) >> 14) | 128, block); \
558 if ((val) >= (1 << 7)) \
559 add_elem (buf,ofs,((val) >> 7) | 128, block); \
560 add_elem (buf,ofs,(val) & 127, block); \
564 longnv_cmp (const void *pa, const void *pb)
566 const LongNV *a = (const LongNV *)pa;
567 const LongNV *b = (const LongNV *)pb;
568 int r = a->name - b->name;
571 r = a->type - b->type;
576 attr_store_pack (Attrstore *s)
579 unsigned int old_mem = 0;
582 s->ent2attr = xcalloc (s->entries, sizeof (s->ent2attr[0]));
584 s->attr_next_free = 0;
586 s->abbr_next_free = 0;
588 s->flat_abbr_next_free = 0;
590 add_num (s->flat_attrs, s->attr_next_free, 0, FLAT_ATTR_BLOCK);
591 add_elem (s->abbr, s->abbr_next_free, 0, ABBR_BLOCK);
592 add_elem (s->flat_abbr, s->flat_abbr_next_free, 0, FLAT_ABBR_BLOCK);
594 for (i = 0; i < s->entries; i++)
596 unsigned int num_attrs = 0, ofs;
597 LongNV *nv = s->attrs[i];
602 old_mem += (num_attrs + 1) * sizeof (LongNV);
605 qsort (s->attrs[i], num_attrs, sizeof (LongNV), longnv_cmp);
606 unsigned int this_abbr;
608 for (this_abbr = 0; this_abbr < s->abbr_next_free; this_abbr++)
610 for (ofs = 0; ofs < num_attrs; ofs++)
612 unsigned short name_type = (nv[ofs].name << 4) | nv[ofs].type;
613 assert (s->abbr[this_abbr] + ofs < s->flat_abbr_next_free);
614 if (name_type != s->flat_abbr[s->abbr[this_abbr]+ofs])
617 if (ofs == num_attrs && !s->flat_abbr[s->abbr[this_abbr]+ofs])
620 if (this_abbr == s->abbr_next_free)
622 /* This schema not found --> insert it. */
623 add_elem (s->abbr, s->abbr_next_free, s->flat_abbr_next_free, ABBR_BLOCK);
624 for (ofs = 0; ofs < num_attrs; ofs++)
626 unsigned short name_type = (nv[ofs].name << 4) | nv[ofs].type;
627 add_elem (s->flat_abbr, s->flat_abbr_next_free, name_type, FLAT_ABBR_BLOCK);
629 add_elem (s->flat_abbr, s->flat_abbr_next_free, 0, FLAT_ABBR_BLOCK);
631 s->ent2attr[i] = s->attr_next_free;
632 add_num (s->flat_attrs, s->attr_next_free, this_abbr, FLAT_ATTR_BLOCK);
633 for (ofs = 0; ofs < num_attrs; ofs++)
634 switch (nv[ofs].type)
639 unsigned int i = nv[ofs].v.i[0];
640 add_num (s->flat_attrs, s->attr_next_free, i, FLAT_ATTR_BLOCK);
645 unsigned int i = nv[ofs].v.i[0];
646 add_num (s->flat_attrs, s->attr_next_free, i, FLAT_ATTR_BLOCK);
648 add_num (s->flat_attrs, s->attr_next_free, i, FLAT_ATTR_BLOCK);
653 const char *str = nv[ofs].v.str;
655 add_elem (s->flat_attrs, s->attr_next_free, *str, FLAT_ATTR_BLOCK);
656 add_elem (s->flat_attrs, s->attr_next_free, 0, FLAT_ATTR_BLOCK);
657 old_mem += strlen ((const char*)nv[ofs].v.str) + 1;
658 xfree ((void*)nv[ofs].v.str);
663 const int *il = nv[ofs].v.intlist;
665 for (; (i = *il) != 0; il++, old_mem += 4)
666 add_num (s->flat_attrs, s->attr_next_free, i, FLAT_ATTR_BLOCK);
667 add_num (s->flat_attrs, s->attr_next_free, 0, FLAT_ATTR_BLOCK);
669 xfree (nv[ofs].v.intlist);
674 const Id *il = nv[ofs].v.localids;
676 for (; (i = *il) != 0; il++, old_mem += 4)
677 add_num (s->flat_attrs, s->attr_next_free, i, FLAT_ATTR_BLOCK);
678 add_num (s->flat_attrs, s->attr_next_free, 0, FLAT_ATTR_BLOCK);
680 xfree (nv[ofs].v.localids);
688 old_mem += s->entries * sizeof (s->attrs[0]);
692 /* Remove the hashtable too, it will be build on demand in str2localid
693 the next time we call it, which should not happen while in packed mode. */
694 old_mem += (s->stringhashmask + 1) * sizeof (s->stringhashtbl[0]);
695 free (s->stringhashtbl);
696 s->stringhashtbl = 0;
697 s->stringhashmask = 0;
699 fprintf (stderr, "%d\n", old_mem);
700 fprintf (stderr, "%zd\n", s->entries * sizeof(s->ent2attr[0]));
701 fprintf (stderr, "%d\n", s->attr_next_free);
702 fprintf (stderr, "%zd\n", s->abbr_next_free * sizeof(s->abbr[0]));
703 fprintf (stderr, "%zd\n", s->flat_abbr_next_free * sizeof(s->flat_abbr[0]));
704 fprintf (stderr, "pages %d\n", s->num_pages);
708 /* Pages in all blob pages, and deactivates paging. */
710 pagein_all (Attrstore *s)
712 /* If we have no backing file everything is there already. */
715 /*fprintf (stderr, "Aieee!\n");
720 attr_store_unpack (Attrstore *s)
728 /* Make the store writable right away, so we can use our adder functions. */
730 s->attrs = xcalloc (s->entries, sizeof (s->attrs[0]));
732 for (i = 0; i < s->entries; i++)
735 FOR_ATTRS (s, i, &ai)
740 add_attr_int (s, i, ai.name, ai.as_int);
743 add_attr_id (s, i, ai.name, ai.as_id);
746 add_attr_chunk (s, i, ai.name, ai.as_chunk[0], ai.as_chunk[1]);
749 add_attr_string (s, i, ai.name, ai.as_string);
756 get_num (ai.as_numlist, val);
759 add_attr_intlist_int (s, i, ai.name, val);
768 get_num (ai.as_numlist, val);
771 add_attr_localids_id (s, i, ai.name, val);
783 xfree (s->flat_attrs);
785 s->attr_next_free = 0;
788 s->abbr_next_free = 0;
789 xfree (s->flat_abbr);
791 s->flat_abbr_next_free = 0;
795 write_u32(FILE *fp, unsigned int x)
797 if (putc(x >> 24, fp) == EOF ||
798 putc(x >> 16, fp) == EOF ||
799 putc(x >> 8, fp) == EOF ||
802 perror("write error");
808 write_id(FILE *fp, Id x)
813 putc((x >> 28) | 128, fp);
815 putc((x >> 21) | 128, fp);
816 putc((x >> 14) | 128, fp);
819 putc((x >> 7) | 128, fp);
820 if (putc(x & 127, fp) == EOF)
822 perror("write error");
828 write_pages (FILE *fp, Attrstore *s)
831 unsigned char buf[BLOB_PAGESIZE];
833 /* The compressed pages in the file have different sizes, so we need
834 to store these sizes somewhere, either in front of all page data,
835 interleaved with the page data (in front of each page), or after
836 the page data. At this point we don't yet know the final compressed
837 sizes. These are the pros and cons:
838 * in front of all page data
839 + when reading back we only have to read this header, and know
840 where every page data is placed
841 - we have to compress all pages first before starting to write them.
842 Our output stream might be unseekable, so we can't simply
843 reserve space for the header, write all pages and then update the
844 header. This needs memory for all compressed pages at once.
845 * interleaved with page data
846 + we can compress and write per page, low memory overhead
847 - when reading back we have to read at least those numbers,
848 thereby either having to read all page data, or at least seek
850 * after all page data
851 + we can do streamed writing, remembering the sizes per page,
852 and emitting the header (which is a footer then) at the end
853 - reading back is hardest: before the page data we don't know
854 how long it is overall, so we have to put that information
855 also at the end, but it needs a determinate position, so can
856 only be at a known offset from the end. But that means that
857 we must be able to seek when reading back. We have this
858 wish anyway in case we want to use on-demand paging then, but
861 Of all these it seems the best good/bad ratio is with the interleaved
862 storage. No memory overhead at writing and no unreasonable limitations
864 write_u32 (fp, s->blob_next_free);
865 write_u32 (fp, BLOB_PAGESIZE);
866 assert (((s->blob_next_free + BLOB_PAGESIZE - 1) / BLOB_PAGESIZE) == s->num_pages);
867 for (i = 0; i < s->num_pages; i++)
870 unsigned int out_len;
872 if (i == s->num_pages - 1)
873 in_len = s->blob_next_free & (BLOB_PAGESIZE - 1);
875 in_len = BLOB_PAGESIZE;
878 in = attr_retrieve_blob (s, i * BLOB_PAGESIZE, in_len);
879 out_len = compress_buf (in, in_len, buf, in_len - 1);
882 memcpy (buf, in, in_len);
888 fprintf (stderr, "page %d: %d -> %d\n", i, in_len, out_len);
889 write_u32 (fp, out_len * 2 + (out_len != in_len));
891 && fwrite (buf, out_len, 1, fp) != 1)
893 perror("write error");
900 write_attr_store (FILE *fp, Attrstore *s)
903 unsigned local_ssize;
907 write_u32 (fp, s->entries);
908 write_u32 (fp, s->num_nameids);
909 write_u32 (fp, s->nstrings);
910 for (i = 2; i < s->num_nameids; i++)
912 const char *str = id2str (s->pool, s->nameids[i]);
913 if (fwrite(str, strlen(str) + 1, 1, fp) != 1)
915 perror("write error");
920 for (i = 2, local_ssize = 0; i < (unsigned)s->nstrings; i++)
921 local_ssize += strlen (localid2str (s, i)) + 1;
923 write_u32 (fp, local_ssize);
924 for (i = 2; i < (unsigned)s->nstrings; i++)
926 const char *str = localid2str (s, i);
927 if (fwrite(str, strlen(str) + 1, 1, fp) != 1)
929 perror("write error");
935 for (i = 0; i < s->entries; i++)
936 if (i == 0 || s->ent2attr[i] == 0)
937 write_id (fp, s->ent2attr[i]);
940 write_id (fp, s->ent2attr[i] - last);
941 assert (last < s->ent2attr[i]);
942 last = s->ent2attr[i];
945 write_u32 (fp, s->attr_next_free);
946 if (fwrite (s->flat_attrs, s->attr_next_free, 1, fp) != 1)
948 perror ("write error");
952 write_u32 (fp, s->flat_abbr_next_free);
953 if (fwrite (s->flat_abbr, s->flat_abbr_next_free * sizeof (s->flat_abbr[0]), 1, fp) != 1)
955 perror ("write error");
968 for (i = 0; i < 4; i++)
973 fprintf(stderr, "unexpected EOF\n");
988 fprintf(stderr, "unexpected EOF\n");
995 read_id(FILE *fp, Id max)
1000 for (i = 0; i < 5; i++)
1005 fprintf(stderr, "unexpected EOF\n");
1011 if (max && x >= max)
1013 fprintf(stderr, "read_id: id too large (%u/%u)\n", x, max);
1018 x = (x << 7) ^ c ^ 128;
1020 fprintf(stderr, "read_id: id too long\n");
1024 /* Try to either setup on-demand paging (using FP as backing
1025 file), or in case that doesn't work (FP not seekable) slurps in
1026 all pages and deactivates paging. */
1028 read_or_setup_pages (FILE *fp, Attrstore *s)
1030 unsigned int blobsz;
1031 unsigned int pagesz;
1032 unsigned int npages;
1034 unsigned int can_seek;
1036 unsigned char buf[BLOB_PAGESIZE];
1037 blobsz = read_u32 (fp);
1038 pagesz = read_u32 (fp);
1039 if (pagesz != BLOB_PAGESIZE)
1041 /* We could handle this by slurping in everything. */
1042 fprintf (stderr, "non matching page size\n");
1046 if ((cur_file_ofs = ftell (fp)) < 0)
1049 fprintf (stderr, "can %sseek\n", can_seek ? "" : "NOT ");
1050 npages = (blobsz + BLOB_PAGESIZE - 1) / BLOB_PAGESIZE;
1052 s->num_pages = npages;
1053 s->pages = xmalloc (npages * sizeof (s->pages[0]));
1055 /* If we can't seek on our input we have to slurp in everything. */
1058 s->blob_next_free = blobsz;
1059 s->blob_store = xrealloc (s->blob_store, (s->blob_next_free + BLOB_BLOCK) &~BLOB_BLOCK);
1061 for (i = 0; i < npages; i++)
1063 unsigned int in_len = read_u32 (fp);
1064 unsigned int compressed = in_len & 1;
1065 Attrblobpage *p = s->pages + i;
1067 fprintf (stderr, "page %d: len %d (%scompressed)\n",
1068 i, in_len, compressed ? "" : "not ");
1073 p->file_offset = cur_file_ofs;
1074 p->file_size = in_len * 2 + compressed;
1075 if (fseek (fp, in_len, SEEK_CUR) < 0)
1078 fprintf (stderr, "can't seek after we thought we can\n");
1079 /* We can't fall back to non-seeking behaviour as we already
1080 read over some data pages without storing them away. */
1083 cur_file_ofs += in_len;
1087 unsigned int out_len;
1088 void *dest = s->blob_store + i * BLOB_PAGESIZE;
1089 p->mapped_at = i * BLOB_PAGESIZE;
1092 /* We can't seek, so suck everything in. */
1093 if (fread (compressed ? buf : dest, in_len, 1, fp) != 1)
1100 out_len = unchecked_decompress_buf (buf, in_len,
1101 dest, BLOB_PAGESIZE);
1102 if (out_len != BLOB_PAGESIZE
1105 fprintf (stderr, "can't decompress\n");
1114 /* If we are here we were able to seek to all page
1115 positions, so activate paging by copying FP into our structure.
1116 We dup() the file, so that our callers can fclose() it and we
1117 still have it open. But this means that we share file positions
1118 with the input filedesc. So in case our caller reads it after us,
1119 and calls back into us we might change the file position unexpectedly
1121 int fd = dup (fileno (fp));
1124 /* Jeez! What a bloody system, we can't dup() anymore. */
1128 /* XXX we don't close this yet anywhere. */
1129 s->file = fdopen (fd, "r");
1132 /* My God! What happened now? */
1140 attr_store_read (FILE *fp, Pool *pool)
1144 unsigned local_ssize;
1148 Attrstore *s = new_store (pool);
1150 nentries = read_u32 (fp);
1151 s->num_nameids = read_u32 (fp);
1152 nstrings = read_u32 (fp);
1155 buf = malloc (buflen);
1157 s->nameids = realloc (s->nameids, (((s->num_nameids+127) & ~127) * sizeof (s->nameids[0])));
1158 for (i = 2; i < s->num_nameids; i++)
1163 int c = read_u8 (fp);
1167 buf = realloc (buf, buflen);
1173 s->nameids[i] = str2id (s->pool, buf, 1);
1176 local_ssize = read_u32 (fp);
1177 char *strsp = (char *)xrealloc(s->stringspace, s->sstrings + local_ssize + 1);
1178 Offset *str = (Offset *)xrealloc(s->strings, (nstrings) * sizeof(Offset));
1180 s->stringspace = strsp;
1182 strsp += s->sstrings;
1184 if (fread(strsp, local_ssize, 1, fp) != 1)
1186 perror ("read error while reading strings");
1189 strsp[local_ssize] = 0;
1191 /* Don't build hashtable here, it will be built on demand by str2localid
1192 should we call that. */
1194 strsp = s->stringspace;
1195 s->nstrings = nstrings;
1196 for (i = 0; i < nstrings; i++)
1198 str[i] = strsp - s->stringspace;
1199 strsp += strlen (strsp) + 1;
1201 s->sstrings = strsp - s->stringspace;
1203 s->entries = nentries;
1205 s->ent2attr = xmalloc (s->entries * sizeof (s->ent2attr[0]));
1207 for (i = 0; i < s->entries; i++)
1209 int d = read_id (fp, 0);
1210 if (i == 0 || d == 0)
1215 s->ent2attr[i] = last;
1219 s->attr_next_free = read_u32 (fp);
1220 s->flat_attrs = xmalloc (((s->attr_next_free + FLAT_ATTR_BLOCK) & ~FLAT_ATTR_BLOCK) * sizeof (s->flat_attrs[0]));
1221 if (fread (s->flat_attrs, s->attr_next_free, 1, fp) != 1)
1223 perror ("read error");
1227 s->flat_abbr_next_free = read_u32 (fp);
1228 s->flat_abbr = xmalloc (((s->flat_abbr_next_free + FLAT_ABBR_BLOCK) & ~FLAT_ABBR_BLOCK) * sizeof (s->flat_abbr[0]));
1229 if (fread (s->flat_abbr, s->flat_abbr_next_free * sizeof (s->flat_abbr[0]), 1, fp) != 1)
1231 perror ("read error");
1235 assert (s->flat_abbr[0] == 0);
1236 s->abbr_next_free = 0;
1238 add_elem (s->abbr, s->abbr_next_free, 0, ABBR_BLOCK);
1241 for (abbi = 0; abbi < s->flat_abbr_next_free - 1; abbi++)
1242 if (s->flat_abbr[abbi] == 0)
1243 add_elem (s->abbr, s->abbr_next_free, abbi + 1, ABBR_BLOCK);
1244 assert (s->flat_abbr[abbi] == 0);
1246 read_or_setup_pages (fp, s);
1259 Pool *pool = pool_create ();
1260 Attrstore *s = new_store (pool);
1261 unsigned int id1 = new_entry (s);
1262 unsigned int id2 = new_entry (s);
1263 unsigned int id3 = new_entry (s);
1264 unsigned int id4 = new_entry (s);
1265 add_attr_int (s, id1, str2nameid (s, "name1"), 42);
1266 add_attr_chunk (s, id1, str2nameid (s, "name2"), 9876, 1024);
1267 add_attr_string (s, id1, str2nameid (s, "name3"), "hallo");
1268 add_attr_int (s, id1, str2nameid (s, "name1"), 43);
1269 add_attr_id (s, id1, str2nameid (s, "id1"), 100);
1270 add_attr_intlist_int (s, id1, str2nameid (s, "intlist1"), 3);
1271 add_attr_intlist_int (s, id1, str2nameid (s, "intlist1"), 14);
1272 add_attr_intlist_int (s, id1, str2nameid (s, "intlist1"), 1);
1273 add_attr_intlist_int (s, id1, str2nameid (s, "intlist1"), 59);
1274 add_attr_localids_id (s, id1, str2nameid (s, "l_ids1"), str2localid (s, "one", 1));
1275 add_attr_localids_id (s, id1, str2nameid (s, "l_ids1"), str2localid (s, "two", 1));
1276 add_attr_localids_id (s, id1, str2nameid (s, "l_ids1"), str2localid (s, "three", 1));
1277 add_attr_localids_id (s, id1, str2nameid (s, "l_ids2"), str2localid (s, "three", 1));
1278 add_attr_localids_id (s, id1, str2nameid (s, "l_ids2"), str2localid (s, "two", 1));
1279 add_attr_localids_id (s, id1, str2nameid (s, "l_ids2"), str2localid (s, "one", 1));
1280 write_attr_store (stdout, s);