1 /* Reading binary .mo files.
2 Copyright (C) 1995-1998, 2000-2007, 2015 Free Software Foundation,
4 Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
33 /* This include file describes the main part of binary .mo format. */
38 #include "binary-io.h"
44 #define _(str) gettext (str)
53 /* We read the file completely into memory. This is more efficient than
54 lots of lseek(). This struct represents the .mo file in memory. */
60 enum mo_endianness endian;
64 /* Read the contents of the given input stream. */
66 read_binary_mo_file (struct binary_mo_file *bfp,
67 FILE *fp, const char *filename)
76 const size_t increment = 4096;
77 if (size + increment > alloc)
79 alloc = alloc + alloc / 2;
80 if (alloc < size + increment)
81 alloc = size + increment;
82 buf = (char *) xrealloc (buf, alloc);
84 count = fread (buf + size, 1, increment, fp);
88 error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
94 buf = (char *) xrealloc (buf, size);
95 bfp->filename = filename;
100 /* Get a 32-bit number from the file, at the given file position. */
102 get_uint32 (const struct binary_mo_file *bfp, size_t offset)
104 nls_uint32 b0, b1, b2, b3;
105 size_t end = xsum (offset, 4);
107 if (size_overflow_p (end) || end > bfp->size)
108 error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
110 b0 = *(unsigned char *) (bfp->data + offset + 0);
111 b1 = *(unsigned char *) (bfp->data + offset + 1);
112 b2 = *(unsigned char *) (bfp->data + offset + 2);
113 b3 = *(unsigned char *) (bfp->data + offset + 3);
114 if (bfp->endian == MO_LITTLE_ENDIAN)
115 return b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
117 return (b0 << 24) | (b1 << 16) | (b2 << 8) | b3;
120 /* Get a static string from the file, at the given file position. */
122 get_string (const struct binary_mo_file *bfp, size_t offset, size_t *lengthp)
124 /* See 'struct string_desc'. */
125 nls_uint32 s_length = get_uint32 (bfp, offset);
126 nls_uint32 s_offset = get_uint32 (bfp, offset + 4);
127 size_t s_end = xsum3 (s_offset, s_length, 1);
129 if (size_overflow_p (s_end) || s_end > bfp->size)
130 error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
131 if (bfp->data[s_offset + s_length] != '\0')
132 error (EXIT_FAILURE, 0,
133 _("file \"%s\" contains a not NUL terminated string"),
136 *lengthp = s_length + 1;
137 return bfp->data + s_offset;
140 /* Get a system dependent string from the file, at the given file position. */
142 get_sysdep_string (const struct binary_mo_file *bfp, size_t offset,
143 const struct mo_file_header *header, size_t *lengthp)
145 /* See 'struct sysdep_string'. */
152 /* Compute the length. */
153 s_offset = get_uint32 (bfp, offset);
155 for (i = 4; ; i += 8)
157 nls_uint32 segsize = get_uint32 (bfp, offset + i);
158 nls_uint32 sysdepref = get_uint32 (bfp, offset + i + 4);
159 nls_uint32 sysdep_segment_offset;
160 nls_uint32 ss_length;
161 nls_uint32 ss_offset;
166 s_end = xsum (s_offset, segsize);
167 if (size_overflow_p (s_end) || s_end > bfp->size)
168 error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
172 if (sysdepref == SEGMENTS_END)
174 if (sysdepref >= header->n_sysdep_segments)
176 error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format"),
178 /* See 'struct sysdep_segment'. */
179 sysdep_segment_offset = header->sysdep_segments_offset + sysdepref * 8;
180 ss_length = get_uint32 (bfp, sysdep_segment_offset);
181 ss_offset = get_uint32 (bfp, sysdep_segment_offset + 4);
182 ss_end = xsum (ss_offset, ss_length);
183 if (size_overflow_p (ss_end) || ss_end > bfp->size)
184 error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
185 if (!(ss_length > 0 && bfp->data[ss_end - 1] == '\0'))
188 sprintf (location, "sysdep_segment[%u]", (unsigned int) sysdepref);
189 error (EXIT_FAILURE, 0,
190 _("file \"%s\" contains a not NUL terminated string, at %s"),
191 bfp->filename, location);
193 n = strlen (bfp->data + ss_offset);
194 length += (n > 1 ? 1 + n + 1 : n);
197 /* Allocate and fill the string. */
198 string = XNMALLOC (length, char);
200 s_offset = get_uint32 (bfp, offset);
201 for (i = 4; ; i += 8)
203 nls_uint32 segsize = get_uint32 (bfp, offset + i);
204 nls_uint32 sysdepref = get_uint32 (bfp, offset + i + 4);
205 nls_uint32 sysdep_segment_offset;
206 nls_uint32 ss_length;
207 nls_uint32 ss_offset;
210 memcpy (p, bfp->data + s_offset, segsize);
214 if (sysdepref == SEGMENTS_END)
216 if (sysdepref >= header->n_sysdep_segments)
218 /* See 'struct sysdep_segment'. */
219 sysdep_segment_offset = header->sysdep_segments_offset + sysdepref * 8;
220 ss_length = get_uint32 (bfp, sysdep_segment_offset);
221 ss_offset = get_uint32 (bfp, sysdep_segment_offset + 4);
222 if (ss_offset + ss_length > bfp->size)
224 if (!(ss_length > 0 && bfp->data[ss_offset + ss_length - 1] == '\0'))
226 n = strlen (bfp->data + ss_offset);
229 memcpy (p, bfp->data + ss_offset, n);
235 if (p != string + length)
242 /* Reads an existing .mo file and adds the messages to mlp. */
244 read_mo_file (message_list_ty *mlp, const char *filename)
247 struct binary_mo_file bf;
248 struct mo_file_header header;
250 static lex_pos_ty pos = { __FILE__, __LINE__ };
252 if (strcmp (filename, "-") == 0 || strcmp (filename, "/dev/stdin") == 0)
255 SET_BINARY (fileno (fp));
259 fp = fopen (filename, "rb");
261 error (EXIT_FAILURE, errno,
262 _("error while opening \"%s\" for reading"), filename);
265 /* Read the file contents into memory. */
266 read_binary_mo_file (&bf, fp, filename);
268 /* Get a 32-bit number from the file header. */
269 # define GET_HEADER_FIELD(field) \
270 get_uint32 (&bf, offsetof (struct mo_file_header, field))
272 /* We must grope the file to determine which endian it is.
273 Perversity of the universe tends towards maximum, so it will
274 probably not match the currently executing architecture. */
275 bf.endian = MO_BIG_ENDIAN;
276 header.magic = GET_HEADER_FIELD (magic);
277 if (header.magic != _MAGIC)
279 bf.endian = MO_LITTLE_ENDIAN;
280 header.magic = GET_HEADER_FIELD (magic);
281 if (header.magic != _MAGIC)
284 error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format"),
289 header.revision = GET_HEADER_FIELD (revision);
291 /* We support only the major revisions 0 and 1. */
292 switch (header.revision >> 16)
296 /* Fill the header parts that apply to major revisions 0 and 1. */
297 header.nstrings = GET_HEADER_FIELD (nstrings);
298 header.orig_tab_offset = GET_HEADER_FIELD (orig_tab_offset);
299 header.trans_tab_offset = GET_HEADER_FIELD (trans_tab_offset);
300 header.hash_tab_size = GET_HEADER_FIELD (hash_tab_size);
301 header.hash_tab_offset = GET_HEADER_FIELD (hash_tab_offset);
303 for (i = 0; i < header.nstrings; i++)
313 /* Read the msgctxt and msgid. */
314 msgid = get_string (&bf, header.orig_tab_offset + i * 8,
316 /* Split into msgctxt and msgid. */
317 separator = strchr (msgid, MSGCTXT_SEPARATOR);
318 if (separator != NULL)
320 /* The part before the MSGCTXT_SEPARATOR is the msgctxt. */
323 msgid = separator + 1;
324 msgid_len -= msgid - msgctxt;
329 /* Read the msgstr. */
330 msgstr = get_string (&bf, header.trans_tab_offset + i * 8,
333 mp = message_alloc (msgctxt,
335 (strlen (msgid) + 1 < msgid_len
336 ? msgid + strlen (msgid) + 1
340 message_list_append (mlp, mp);
343 switch (header.revision & 0xffff)
349 /* Fill the header parts that apply to minor revision >= 1. */
350 header.n_sysdep_segments = GET_HEADER_FIELD (n_sysdep_segments);
351 header.sysdep_segments_offset =
352 GET_HEADER_FIELD (sysdep_segments_offset);
353 header.n_sysdep_strings = GET_HEADER_FIELD (n_sysdep_strings);
354 header.orig_sysdep_tab_offset =
355 GET_HEADER_FIELD (orig_sysdep_tab_offset);
356 header.trans_sysdep_tab_offset =
357 GET_HEADER_FIELD (trans_sysdep_tab_offset);
359 for (i = 0; i < header.n_sysdep_strings; i++)
371 /* Read the msgctxt and msgid. */
372 offset = get_uint32 (&bf, header.orig_sysdep_tab_offset + i * 4);
373 msgid = get_sysdep_string (&bf, offset, &header, &msgid_len);
374 /* Split into msgctxt and msgid. */
375 separator = strchr (msgid, MSGCTXT_SEPARATOR);
376 if (separator != NULL)
378 /* The part before the MSGCTXT_SEPARATOR is the msgctxt. */
381 msgid = separator + 1;
382 msgid_len -= msgid - msgctxt;
387 /* Read the msgstr. */
388 offset = get_uint32 (&bf, header.trans_sysdep_tab_offset + i * 4);
389 msgstr = get_sysdep_string (&bf, offset, &header, &msgstr_len);
391 mp = message_alloc (msgctxt,
393 (strlen (msgid) + 1 < msgid_len
394 ? msgid + strlen (msgid) + 1
399 /* Only messages with c-format or objc-format annotation are
400 recognized as having system-dependent strings by msgfmt.
401 Which one of the two, we don't know. We have to guess,
402 assuming that c-format is more probable than objc-format and
403 that the .mo was likely produced by "msgfmt -c". */
404 for (f = format_c; ; f = format_objc)
407 struct formatstring_parser *parser = formatstring_parsers[f];
411 str_end = msgid + msgid_len;
412 for (str = msgid; str < str_end; str += strlen (str) + 1)
414 char *invalid_reason = NULL;
416 parser->parse (str, false, NULL, &invalid_reason);
419 parser->free (descr);
422 free (invalid_reason);
429 str_end = msgstr + msgstr_len;
430 for (str = msgstr; str < str_end; str += strlen (str) + 1)
432 char *invalid_reason = NULL;
434 parser->parse (str, true, NULL, &invalid_reason);
437 parser->free (descr);
440 free (invalid_reason);
449 /* Found the most likely among c-format, objc-format. */
450 mp->is_format[f] = yes;
455 if (f == format_objc)
459 message_list_append (mlp, mp);