1 /* Writing binary .mo files.
2 Copyright (C) 1995-1998, 2000-2007 Free Software Foundation, Inc.
3 Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
33 # include <sys/param.h>
36 /* These two include files describe the binary .mo format. */
38 #include "hash-string.h"
48 #include "binary-io.h"
49 #include "fwriteerror.h"
52 #define _(str) gettext (str)
54 #define freea(p) /* nothing */
56 /* Usually defined in <sys/param.h>. */
58 # if defined __GNUC__ && __GNUC__ >= 2
59 # define roundup(x, y) ({typeof(x) _x = (x); typeof(y) _y = (y); \
60 ((_x + _y - 1) / _y) * _y; })
62 # define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
67 /* Alignment of strings in resulting .mo file. */
70 /* True if writing a .mo file in opposite endianness than the host. */
73 /* True if no hash table in .mo is wanted. */
77 /* Destructively changes the byte order of a 32-bit value in memory. */
78 #define BSWAP32(x) (x) = bswap_32 (x)
81 /* Indices into the strings contained in 'struct pre_message' and
82 'struct pre_sysdep_message'. */
85 M_ID = 0, /* msgid - the original string */
86 M_STR = 1 /* msgstr - the translated string */
89 /* An intermediate data structure representing a 'struct string_desc'. */
96 /* An intermediate data structure representing a message. */
99 struct pre_string str[2];
100 const char *id_plural;
101 size_t id_plural_len;
105 compare_id (const void *pval1, const void *pval2)
107 return strcmp (((struct pre_message *) pval1)->str[M_ID].pointer,
108 ((struct pre_message *) pval2)->str[M_ID].pointer);
112 /* An intermediate data structure representing a 'struct sysdep_segment'. */
113 struct pre_sysdep_segment
119 /* An intermediate data structure representing a 'struct segment_pair'. */
120 struct pre_segment_pair
127 /* An intermediate data structure representing a 'struct sysdep_string'. */
128 struct pre_sysdep_string
130 unsigned int segmentcount;
131 struct pre_segment_pair segments[1];
134 /* An intermediate data structure representing a message with system dependent
136 struct pre_sysdep_message
138 struct pre_sysdep_string *str[2];
139 const char *id_plural;
140 size_t id_plural_len;
143 /* Write the message list to the given open file. */
145 write_table (FILE *output_file, message_list_ty *mlp)
149 struct pre_message *msg_arr;
150 size_t n_sysdep_strings;
151 struct pre_sysdep_message *sysdep_msg_arr;
152 size_t n_sysdep_segments;
153 struct pre_sysdep_segment *sysdep_segments;
157 bool omit_hash_table;
158 nls_uint32 hash_tab_size;
159 struct mo_file_header header; /* Header of the .mo file to be written. */
162 struct string_desc *orig_tab;
163 struct string_desc *trans_tab;
164 size_t sysdep_tab_offset = 0;
169 /* First pass: Move the static string pairs into an array, for sorting,
170 and at the same time, compute the segments of the system dependent
172 msgctid_arr = XNMALLOC (mlp->nitems, char *);
174 msg_arr = XNMALLOC (mlp->nitems, struct pre_message);
175 n_sysdep_strings = 0;
176 sysdep_msg_arr = XNMALLOC (mlp->nitems, struct pre_sysdep_message);
177 n_sysdep_segments = 0;
178 sysdep_segments = NULL;
179 have_outdigits = false;
180 for (j = 0; j < mlp->nitems; j++)
182 message_ty *mp = mlp->item[j];
185 struct interval *intervals[2];
186 size_t nintervals[2];
188 /* Concatenate mp->msgctxt and mp->msgid into msgctid. */
189 msgctlen = (mp->msgctxt != NULL ? strlen (mp->msgctxt) + 1 : 0);
190 msgctid = XNMALLOC (msgctlen + strlen (mp->msgid) + 1, char);
191 if (mp->msgctxt != NULL)
193 memcpy (msgctid, mp->msgctxt, msgctlen - 1);
194 msgctid[msgctlen - 1] = MSGCTXT_SEPARATOR;
196 strcpy (msgctid + msgctlen, mp->msgid);
197 msgctid_arr[j] = msgctid;
199 intervals[M_ID] = NULL;
200 nintervals[M_ID] = 0;
201 intervals[M_STR] = NULL;
202 nintervals[M_STR] = 0;
204 /* Test if mp contains system dependent strings and thus
205 requires the use of the .mo file minor revision 1. */
206 if (possible_format_p (mp->is_format[format_c])
207 || possible_format_p (mp->is_format[format_objc]))
209 /* Check whether msgid or msgstr contain ISO C 99 <inttypes.h>
210 format string directives. No need to check msgid_plural, because
211 it is not accessed by the [n]gettext() function family. */
215 get_sysdep_c_format_directives (mp->msgid, false,
216 &intervals[M_ID], &nintervals[M_ID]);
219 struct interval *id_intervals = intervals[M_ID];
220 size_t id_nintervals = nintervals[M_ID];
222 if (id_nintervals > 0)
226 for (i = 0; i < id_nintervals; i++)
228 id_intervals[i].startpos += msgctlen;
229 id_intervals[i].endpos += msgctlen;
234 p_end = mp->msgstr + mp->msgstr_len;
235 for (p = mp->msgstr; p < p_end; p += strlen (p) + 1)
237 struct interval *part_intervals;
238 size_t part_nintervals;
240 get_sysdep_c_format_directives (p, true,
243 if (part_nintervals > 0)
245 size_t d = p - mp->msgstr;
250 xrealloc (intervals[M_STR],
251 (nintervals[M_STR] + part_nintervals)
252 * sizeof (struct interval));
253 for (i = 0; i < part_nintervals; i++)
255 intervals[M_STR][nintervals[M_STR] + i].startpos =
256 d + part_intervals[i].startpos;
257 intervals[M_STR][nintervals[M_STR] + i].endpos =
258 d + part_intervals[i].endpos;
260 nintervals[M_STR] += part_nintervals;
265 if (nintervals[M_ID] > 0 || nintervals[M_STR] > 0)
267 /* System dependent string pair. */
268 for (m = 0; m < 2; m++)
270 struct pre_sysdep_string *pre =
271 (struct pre_sysdep_string *)
272 xmalloc (xsum (sizeof (struct pre_sysdep_string),
273 xtimes (nintervals[m],
274 sizeof (struct pre_segment_pair))));
282 str = msgctid; /* concatenation of mp->msgctxt + mp->msgid */
283 str_len = strlen (msgctid) + 1;
288 str_len = mp->msgstr_len;
292 pre->segmentcount = nintervals[m];
293 for (i = 0; i < nintervals[m]; i++)
299 pre->segments[i].segptr = str + lastpos;
300 pre->segments[i].segsize = intervals[m][i].startpos - lastpos;
302 length = intervals[m][i].endpos - intervals[m][i].startpos;
303 pointer = str + intervals[m][i].startpos;
305 && pointer[0] == '<' && pointer[length - 1] == '>')
307 /* Skip the '<' and '>' markers. */
312 for (r = 0; r < n_sysdep_segments; r++)
313 if (sysdep_segments[r].length == length
314 && memcmp (sysdep_segments[r].pointer, pointer, length)
317 if (r == n_sysdep_segments)
321 (struct pre_sysdep_segment *)
322 xrealloc (sysdep_segments,
324 * sizeof (struct pre_sysdep_segment));
325 sysdep_segments[r].length = length;
326 sysdep_segments[r].pointer = pointer;
329 pre->segments[i].sysdepref = r;
331 if (length == 1 && *pointer == 'I')
332 have_outdigits = true;
334 lastpos = intervals[m][i].endpos;
336 pre->segments[i].segptr = str + lastpos;
337 pre->segments[i].segsize = str_len - lastpos;
338 pre->segments[i].sysdepref = SEGMENTS_END;
340 sysdep_msg_arr[n_sysdep_strings].str[m] = pre;
343 sysdep_msg_arr[n_sysdep_strings].id_plural = mp->msgid_plural;
344 sysdep_msg_arr[n_sysdep_strings].id_plural_len =
345 (mp->msgid_plural != NULL ? strlen (mp->msgid_plural) + 1 : 0);
350 /* Static string pair. */
351 msg_arr[nstrings].str[M_ID].pointer = msgctid;
352 msg_arr[nstrings].str[M_ID].length = strlen (msgctid) + 1;
353 msg_arr[nstrings].str[M_STR].pointer = mp->msgstr;
354 msg_arr[nstrings].str[M_STR].length = mp->msgstr_len;
355 msg_arr[nstrings].id_plural = mp->msgid_plural;
356 msg_arr[nstrings].id_plural_len =
357 (mp->msgid_plural != NULL ? strlen (mp->msgid_plural) + 1 : 0);
361 for (m = 0; m < 2; m++)
362 if (intervals[m] != NULL)
366 /* Sort the table according to original string. */
368 qsort (msg_arr, nstrings, sizeof (struct pre_message), compare_id);
370 /* We need major revision 1 if there are system dependent strings that use
371 "I" because older versions of gettext() crash when this occurs in a .mo
372 file. Otherwise use major revision 0. */
374 (have_outdigits ? MO_REVISION_NUMBER_WITH_SYSDEP_I : MO_REVISION_NUMBER);
376 /* We need minor revision 1 if there are system dependent strings.
377 Otherwise we choose minor revision 0 because it's supported by older
378 versions of libintl and revision 1 isn't. */
379 minor_revision = (n_sysdep_strings > 0 ? 1 : 0);
381 /* In minor revision >= 1, the hash table is obligatory. */
382 omit_hash_table = (no_hash_table && minor_revision == 0);
384 /* This should be explained:
385 Each string has an associate hashing value V, computed by a fixed
386 function. To locate the string we use open addressing with double
387 hashing. The first index will be V % M, where M is the size of the
388 hashing table. If no entry is found, iterating with a second,
389 independent hashing function takes place. This second value will
391 The approximate number of probes will be
393 for unsuccessful search: (1 - N / M) ^ -1
394 for successful search: - (N / M) ^ -1 * ln (1 - N / M)
396 where N is the number of keys.
398 If we now choose M to be the next prime bigger than 4 / 3 * N,
401 Because unsuccessful searches are unlikely this is a good value.
402 Formulas: [Knuth, The Art of Computer Programming, Volume 3,
403 Sorting and Searching, 1973, Addison Wesley] */
404 if (!omit_hash_table)
406 hash_tab_size = next_prime ((mlp->nitems * 4) / 3);
408 if (hash_tab_size <= 2)
415 /* Second pass: Fill the structure describing the header. At the same time,
416 compute the sizes and offsets of the non-string parts of the file. */
419 header.magic = _MAGIC;
420 /* Revision number of file format. */
421 header.revision = (major_revision << 16) + minor_revision;
425 ? offsetof (struct mo_file_header, n_sysdep_segments)
426 : sizeof (struct mo_file_header));
427 offset = header_size;
429 /* Number of static string pairs. */
430 header.nstrings = nstrings;
432 /* Offset of table for original string offsets. */
433 header.orig_tab_offset = offset;
434 offset += nstrings * sizeof (struct string_desc);
435 orig_tab = XNMALLOC (nstrings, struct string_desc);
437 /* Offset of table for translated string offsets. */
438 header.trans_tab_offset = offset;
439 offset += nstrings * sizeof (struct string_desc);
440 trans_tab = XNMALLOC (nstrings, struct string_desc);
442 /* Size of hash table. */
443 header.hash_tab_size = hash_tab_size;
444 /* Offset of hash table. */
445 header.hash_tab_offset = offset;
446 offset += hash_tab_size * sizeof (nls_uint32);
448 if (minor_revision >= 1)
450 /* Size of table describing system dependent segments. */
451 header.n_sysdep_segments = n_sysdep_segments;
452 /* Offset of table describing system dependent segments. */
453 header.sysdep_segments_offset = offset;
454 offset += n_sysdep_segments * sizeof (struct sysdep_segment);
456 /* Number of system dependent string pairs. */
457 header.n_sysdep_strings = n_sysdep_strings;
459 /* Offset of table for original sysdep string offsets. */
460 header.orig_sysdep_tab_offset = offset;
461 offset += n_sysdep_strings * sizeof (nls_uint32);
463 /* Offset of table for translated sysdep string offsets. */
464 header.trans_sysdep_tab_offset = offset;
465 offset += n_sysdep_strings * sizeof (nls_uint32);
467 /* System dependent string descriptors. */
468 sysdep_tab_offset = offset;
469 for (m = 0; m < 2; m++)
470 for (j = 0; j < n_sysdep_strings; j++)
471 offset += sizeof (struct sysdep_string)
472 + sysdep_msg_arr[j].str[m]->segmentcount
473 * sizeof (struct segment_pair);
479 /* Third pass: Write the non-string parts of the file. At the same time,
480 compute the offsets of each string, including the proper alignment. */
482 /* Write the header out. */
485 BSWAP32 (header.magic);
486 BSWAP32 (header.revision);
487 BSWAP32 (header.nstrings);
488 BSWAP32 (header.orig_tab_offset);
489 BSWAP32 (header.trans_tab_offset);
490 BSWAP32 (header.hash_tab_size);
491 BSWAP32 (header.hash_tab_offset);
492 if (minor_revision >= 1)
494 BSWAP32 (header.n_sysdep_segments);
495 BSWAP32 (header.sysdep_segments_offset);
496 BSWAP32 (header.n_sysdep_strings);
497 BSWAP32 (header.orig_sysdep_tab_offset);
498 BSWAP32 (header.trans_sysdep_tab_offset);
501 fwrite (&header, header_size, 1, output_file);
503 /* Table for original string offsets. */
504 /* Here output_file is at position header.orig_tab_offset. */
506 for (j = 0; j < nstrings; j++)
508 offset = roundup (offset, alignment);
510 msg_arr[j].str[M_ID].length + msg_arr[j].id_plural_len;
511 orig_tab[j].offset = offset;
512 offset += orig_tab[j].length;
513 /* Subtract 1 because of the terminating NUL. */
514 orig_tab[j].length--;
517 for (j = 0; j < nstrings; j++)
519 BSWAP32 (orig_tab[j].length);
520 BSWAP32 (orig_tab[j].offset);
522 fwrite (orig_tab, nstrings * sizeof (struct string_desc), 1, output_file);
524 /* Table for translated string offsets. */
525 /* Here output_file is at position header.trans_tab_offset. */
527 for (j = 0; j < nstrings; j++)
529 offset = roundup (offset, alignment);
530 trans_tab[j].length = msg_arr[j].str[M_STR].length;
531 trans_tab[j].offset = offset;
532 offset += trans_tab[j].length;
533 /* Subtract 1 because of the terminating NUL. */
534 trans_tab[j].length--;
537 for (j = 0; j < nstrings; j++)
539 BSWAP32 (trans_tab[j].length);
540 BSWAP32 (trans_tab[j].offset);
542 fwrite (trans_tab, nstrings * sizeof (struct string_desc), 1, output_file);
544 /* Skip this part when no hash table is needed. */
545 if (!omit_hash_table)
547 nls_uint32 *hash_tab;
550 /* Here output_file is at position header.hash_tab_offset. */
552 /* Allocate room for the hashing table to be written out. */
553 hash_tab = XNMALLOC (hash_tab_size, nls_uint32);
554 memset (hash_tab, '\0', hash_tab_size * sizeof (nls_uint32));
556 /* Insert all value in the hash table, following the algorithm described
558 for (j = 0; j < nstrings; j++)
560 nls_uint32 hash_val = hash_string (msg_arr[j].str[M_ID].pointer);
561 nls_uint32 idx = hash_val % hash_tab_size;
563 if (hash_tab[idx] != 0)
565 /* We need the second hashing function. */
566 nls_uint32 incr = 1 + (hash_val % (hash_tab_size - 2));
569 if (idx >= hash_tab_size - incr)
570 idx -= hash_tab_size - incr;
573 while (hash_tab[idx] != 0);
576 hash_tab[idx] = j + 1;
579 /* Write the hash table out. */
581 for (j = 0; j < hash_tab_size; j++)
582 BSWAP32 (hash_tab[j]);
583 fwrite (hash_tab, hash_tab_size * sizeof (nls_uint32), 1, output_file);
588 if (minor_revision >= 1)
590 struct sysdep_segment *sysdep_segments_tab;
591 nls_uint32 *sysdep_tab;
595 /* Here output_file is at position header.sysdep_segments_offset. */
597 sysdep_segments_tab =
598 XNMALLOC (n_sysdep_segments, struct sysdep_segment);
599 for (i = 0; i < n_sysdep_segments; i++)
601 offset = roundup (offset, alignment);
602 /* The "+ 1" accounts for the trailing NUL byte. */
603 sysdep_segments_tab[i].length = sysdep_segments[i].length + 1;
604 sysdep_segments_tab[i].offset = offset;
605 offset += sysdep_segments_tab[i].length;
609 for (i = 0; i < n_sysdep_segments; i++)
611 BSWAP32 (sysdep_segments_tab[i].length);
612 BSWAP32 (sysdep_segments_tab[i].offset);
614 fwrite (sysdep_segments_tab,
615 n_sysdep_segments * sizeof (struct sysdep_segment), 1,
618 free (sysdep_segments_tab);
620 sysdep_tab = XNMALLOC (n_sysdep_strings, nls_uint32);
621 stoffset = sysdep_tab_offset;
623 for (m = 0; m < 2; m++)
625 /* Here output_file is at position
626 m == M_ID -> header.orig_sysdep_tab_offset,
627 m == M_STR -> header.trans_sysdep_tab_offset. */
629 for (j = 0; j < n_sysdep_strings; j++)
631 sysdep_tab[j] = stoffset;
632 stoffset += sizeof (struct sysdep_string)
633 + sysdep_msg_arr[j].str[m]->segmentcount
634 * sizeof (struct segment_pair);
636 /* Write the table for original/translated sysdep string offsets. */
638 for (j = 0; j < n_sysdep_strings; j++)
639 BSWAP32 (sysdep_tab[j]);
640 fwrite (sysdep_tab, n_sysdep_strings * sizeof (nls_uint32), 1,
646 /* Here output_file is at position sysdep_tab_offset. */
648 for (m = 0; m < 2; m++)
649 for (j = 0; j < n_sysdep_strings; j++)
651 struct pre_sysdep_message *msg = &sysdep_msg_arr[j];
652 struct pre_sysdep_string *pre = msg->str[m];
653 struct sysdep_string *str =
654 (struct sysdep_string *)
655 xmalloca (sizeof (struct sysdep_string)
656 + pre->segmentcount * sizeof (struct segment_pair));
659 offset = roundup (offset, alignment);
660 str->offset = offset;
661 for (i = 0; i <= pre->segmentcount; i++)
663 str->segments[i].segsize = pre->segments[i].segsize;
664 str->segments[i].sysdepref = pre->segments[i].sysdepref;
665 offset += str->segments[i].segsize;
667 if (m == M_ID && msg->id_plural_len > 0)
669 str->segments[pre->segmentcount].segsize += msg->id_plural_len;
670 offset += msg->id_plural_len;
674 BSWAP32 (str->offset);
675 for (i = 0; i <= pre->segmentcount; i++)
677 BSWAP32 (str->segments[i].segsize);
678 BSWAP32 (str->segments[i].sysdepref);
682 sizeof (struct sysdep_string)
683 + pre->segmentcount * sizeof (struct segment_pair),
690 /* Here output_file is at position end_offset. */
696 /* Fourth pass: Write the strings. */
700 /* A few zero bytes for padding. */
701 null = (char *) alloca (alignment);
702 memset (null, '\0', alignment);
704 /* Now write the original strings. */
705 for (j = 0; j < nstrings; j++)
707 fwrite (null, roundup (offset, alignment) - offset, 1, output_file);
708 offset = roundup (offset, alignment);
710 fwrite (msg_arr[j].str[M_ID].pointer, msg_arr[j].str[M_ID].length, 1,
712 if (msg_arr[j].id_plural_len > 0)
713 fwrite (msg_arr[j].id_plural, msg_arr[j].id_plural_len, 1,
715 offset += msg_arr[j].str[M_ID].length + msg_arr[j].id_plural_len;
718 /* Now write the translated strings. */
719 for (j = 0; j < nstrings; j++)
721 fwrite (null, roundup (offset, alignment) - offset, 1, output_file);
722 offset = roundup (offset, alignment);
724 fwrite (msg_arr[j].str[M_STR].pointer, msg_arr[j].str[M_STR].length, 1,
726 offset += msg_arr[j].str[M_STR].length;
729 if (minor_revision >= 1)
733 for (i = 0; i < n_sysdep_segments; i++)
735 fwrite (null, roundup (offset, alignment) - offset, 1, output_file);
736 offset = roundup (offset, alignment);
738 fwrite (sysdep_segments[i].pointer, sysdep_segments[i].length, 1,
740 fwrite (null, 1, 1, output_file);
741 offset += sysdep_segments[i].length + 1;
744 for (m = 0; m < 2; m++)
745 for (j = 0; j < n_sysdep_strings; j++)
747 struct pre_sysdep_message *msg = &sysdep_msg_arr[j];
748 struct pre_sysdep_string *pre = msg->str[m];
750 fwrite (null, roundup (offset, alignment) - offset, 1,
752 offset = roundup (offset, alignment);
754 for (i = 0; i <= pre->segmentcount; i++)
756 fwrite (pre->segments[i].segptr, pre->segments[i].segsize, 1,
758 offset += pre->segments[i].segsize;
760 if (m == M_ID && msg->id_plural_len > 0)
762 fwrite (msg->id_plural, msg->id_plural_len, 1, output_file);
763 offset += msg->id_plural_len;
771 for (j = 0; j < mlp->nitems; j++)
772 free (msgctid_arr[j]);
773 free (sysdep_msg_arr);
780 msgdomain_write_mo (message_list_ty *mlp,
781 const char *domain_name,
782 const char *file_name)
786 /* If no entry for this domain don't even create the file. */
787 if (mlp->nitems != 0)
789 if (strcmp (domain_name, "-") == 0)
791 output_file = stdout;
792 SET_BINARY (fileno (output_file));
796 output_file = fopen (file_name, "wb");
797 if (output_file == NULL)
799 error (0, errno, _("error while opening \"%s\" for writing"),
805 if (output_file != NULL)
807 write_table (output_file, mlp);
809 /* Make sure nothing went wrong. */
810 if (fwriteerror (output_file))
811 error (EXIT_FAILURE, errno, _("error while writing \"%s\" file"),