1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
5 * Copyright (C) 2002-2006 Jody Goldberg (jody@gnome.org)
6 * Copyright (C) 2002-2006 Dom Lachowicz (cinamod@hotmail.com)
7 * excel_iconv* family of functions (C) 2001 by Vlad Harchev <hvv@hippo.ru>
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of version 2.1 of the GNU Lesser General Public
11 * License as published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
24 #include <gsf-config.h>
25 #include <gsf/gsf-docprop-vector.h>
26 #include <gsf/gsf-msole-utils.h>
27 #include <gsf/gsf-input.h>
28 #include <gsf/gsf-output.h>
29 #include <gsf/gsf-utils.h>
30 #include <gsf/gsf-timestamp.h>
31 #include <gsf/gsf-meta-names.h>
32 #include <gsf/gsf-doc-meta-data.h>
33 #include <gsf/gsf-clip-data.h>
40 #include <glib/gi18n-lib.h>
42 #define NO_DEBUG_OLE_PROPS
43 #ifndef NO_DEBUG_OLE_PROPS
44 #define d(code) do { code } while (0)
50 DEBUG_UNKNOWN_PROPS = 1
54 msole_debug (guint what)
57 static gboolean inited = FALSE;
61 const GDebugKey keys[] = {
62 { (char*)"msole_prop", DEBUG_UNKNOWN_PROPS },
65 const char *val = g_getenv ("GSF_DEBUG");
67 ? g_parse_debug_string (val, keys, G_N_ELEMENTS (keys))
73 return (flags & what) != 0;
77 * The Format Identifier for Summary Information
78 * F29F85E0-4FF9-1068-AB91-08002B27B3D9
80 static guint8 const component_guid [] = {
81 0xe0, 0x85, 0x9f, 0xf2, 0xf9, 0x4f, 0x68, 0x10,
82 0xab, 0x91, 0x08, 0x00, 0x2b, 0x27, 0xb3, 0xd9
86 * The Format Identifier for Document Summary Information
87 * D5CDD502-2E9C-101B-9397-08002B2CF9AE
89 static guint8 const document_guid [] = {
90 0x02, 0xd5, 0xcd, 0xd5, 0x9c, 0x2e, 0x1b, 0x10,
91 0x93, 0x97, 0x08, 0x00, 0x2b, 0x2c, 0xf9, 0xae
95 * The Format Identifier for User-Defined Properties
96 * D5CDD505-2E9C-101B-9397-08002B2CF9AE
98 static guint8 const user_guid [] = {
99 0x05, 0xd5, 0xcd, 0xd5, 0x9c, 0x2e, 0x1b, 0x10,
100 0x93, 0x97, 0x08, 0x00, 0x2b, 0x2c, 0xf9, 0xae
104 COMMON_PROP, /* in either summary or docsummary */
105 COMPONENT_PROP, /* SummaryInformation properties */
106 DOC_PROP, /* DocumentSummaryInformation properties */
108 } GsfMSOleMetaDataType;
148 VT_STREAMED_OBJECT = 68,
149 VT_STORED_OBJECT = 69,
154 } GsfMSOleVariantType;
158 GsfMSOleMetaDataType section;
159 char const *gsf_name;
161 GsfMSOleVariantType prefered_type;
162 } GsfMSOleMetaDataPropMap;
167 } GsfMSOleMetaDataProp;
170 GsfMSOleMetaDataType type;
172 guint32 size, num_props;
176 } GsfMSOleMetaDataSection;
178 static GsfMSOleMetaDataPropMap const builtin_props [] = {
179 { "Dictionary", COMMON_PROP, GSF_META_NAME_DICTIONARY, 0, 0, /* magic */},
180 { "CodePage", COMMON_PROP, GSF_META_NAME_CODEPAGE, 1, VT_I2 },
181 { "LOCALE_SYSTEM_DEFAULT",COMMON_PROP, GSF_META_NAME_LOCALE_SYSTEM_DEFAULT, 0x80000000, VT_UI4},
182 { "CASE_SENSITIVE", COMMON_PROP, GSF_META_NAME_CASE_SENSITIVE, 0x80000003, VT_UI4},
183 { "Category", DOC_PROP, GSF_META_NAME_CATEGORY, 2, VT_LPSTR },
184 { "PresentationFormat", DOC_PROP, GSF_META_NAME_PRESENTATION_FORMAT, 3, VT_LPSTR },
185 { "NumBytes", DOC_PROP, GSF_META_NAME_BYTE_COUNT, 4, VT_I4 },
186 { "NumLines", DOC_PROP, GSF_META_NAME_LINE_COUNT, 5, VT_I4 },
187 { "NumParagraphs", DOC_PROP, GSF_META_NAME_PARAGRAPH_COUNT, 6, VT_I4 },
188 { "NumSlides", DOC_PROP, GSF_META_NAME_SLIDE_COUNT, 7, VT_I4 },
189 { "NumNotes", DOC_PROP, GSF_META_NAME_NOTE_COUNT, 8, VT_I4 },
190 { "NumHiddenSlides", DOC_PROP, GSF_META_NAME_HIDDEN_SLIDE_COUNT, 9, VT_I4 },
191 { "NumMMClips", DOC_PROP, GSF_META_NAME_MM_CLIP_COUNT, 10, VT_I4 },
192 { "Scale", DOC_PROP, GSF_META_NAME_SCALE, 11, VT_BOOL },
193 { "HeadingPairs", DOC_PROP, GSF_META_NAME_HEADING_PAIRS, 12, VT_VECTOR | VT_VARIANT },
194 { "DocumentParts", DOC_PROP, GSF_META_NAME_DOCUMENT_PARTS, 13, VT_VECTOR | VT_LPSTR },
195 { "Manager", DOC_PROP, GSF_META_NAME_MANAGER, 14, VT_LPSTR },
196 { "Company", DOC_PROP, GSF_META_NAME_COMPANY, 15, VT_LPSTR },
197 { "LinksDirty", DOC_PROP, GSF_META_NAME_LINKS_DIRTY, 16, VT_BOOL },
198 { "DocSumInfo_17", DOC_PROP, GSF_META_NAME_MSOLE_UNKNOWN_17, 17, VT_UNKNOWN },
199 { "DocSumInfo_18", DOC_PROP, GSF_META_NAME_MSOLE_UNKNOWN_18, 18, VT_UNKNOWN },
200 { "DocSumInfo_19", DOC_PROP, GSF_META_NAME_MSOLE_UNKNOWN_19, 19, VT_BOOL },
201 { "DocSumInfo_20", DOC_PROP, GSF_META_NAME_MSOLE_UNKNOWN_20, 20, VT_UNKNOWN },
202 { "DocSumInfo_21", DOC_PROP, GSF_META_NAME_MSOLE_UNKNOWN_21, 21, VT_UNKNOWN },
203 { "DocSumInfo_22", DOC_PROP, GSF_META_NAME_MSOLE_UNKNOWN_22, 22, VT_BOOL },
204 { "DocSumInfo_23", DOC_PROP, GSF_META_NAME_MSOLE_UNKNOWN_23, 23, VT_I4 },
205 { "Title", COMPONENT_PROP, GSF_META_NAME_TITLE, 2, VT_LPSTR },
206 { "Subject", COMPONENT_PROP, GSF_META_NAME_SUBJECT, 3, VT_LPSTR },
207 { "Author", COMPONENT_PROP, GSF_META_NAME_CREATOR, 4, VT_LPSTR },
208 { "Keywords", COMPONENT_PROP, GSF_META_NAME_KEYWORDS, 5, VT_LPSTR },
209 { "Comments", COMPONENT_PROP, GSF_META_NAME_DESCRIPTION, 6, VT_LPSTR },
210 { "Template", COMPONENT_PROP, GSF_META_NAME_TEMPLATE, 7, VT_LPSTR },
211 { "LastSavedBy", COMPONENT_PROP, GSF_META_NAME_LAST_SAVED_BY, 8, VT_LPSTR },
212 { "RevisionNumber", COMPONENT_PROP, GSF_META_NAME_REVISION_COUNT, 9, VT_LPSTR },
213 { "TotalEditingTime", COMPONENT_PROP, GSF_META_NAME_EDITING_DURATION, 10, VT_FILETIME },
214 { "LastPrinted", COMPONENT_PROP, GSF_META_NAME_LAST_PRINTED, 11, VT_FILETIME },
215 { "CreateTime", COMPONENT_PROP, GSF_META_NAME_DATE_CREATED, 12, VT_FILETIME },
216 { "LastSavedTime", COMPONENT_PROP, GSF_META_NAME_DATE_MODIFIED, 13, VT_FILETIME },
217 { "NumPages", COMPONENT_PROP, GSF_META_NAME_PAGE_COUNT, 14, VT_I4 },
218 { "NumWords", COMPONENT_PROP, GSF_META_NAME_WORD_COUNT, 15, VT_I4 },
219 { "NumCharacters", COMPONENT_PROP, GSF_META_NAME_CHARACTER_COUNT, 16, VT_I4 },
220 { "Thumbnail", COMPONENT_PROP, GSF_META_NAME_THUMBNAIL, 17, VT_CF },
221 { "AppName", COMPONENT_PROP, GSF_META_NAME_GENERATOR, 18, VT_LPSTR },
222 { "Security", COMPONENT_PROP, GSF_META_NAME_SECURITY, 19, VT_I4 }
225 static GHashTable *name_to_prop_hash = NULL;
228 msole_vt_name (GsfMSOleVariantType type)
230 static char const *names[] = {
231 "VT_EMPTY", "VT_NULL", "VT_I2", "VT_I4", "VT_R4",
232 "VT_R8", "VT_CY", "VT_DATE", "VT_BSTR", "VT_DISPATCH",
233 "VT_ERROR", "VT_BOOL", "VT_VARIANT", "VT_UNKNOWN", "VT_DECIMAL",
234 NULL, "VT_I1", "VT_UI1", "VT_UI2", "VT_UI4",
235 "VT_I8", "VT_UI8", "VT_INT", "VT_UINT", "VT_VOID",
236 "VT_HRESULT", "VT_PTR", "VT_SAFEARRAY", "VT_CARRAY", "VT_USERDEFINED",
237 "VT_LPSTR", "VT_LPWSTR",
239 static char const *names2[] = {
241 "VT_BLOB", "VT_STREAM", "VT_STORAGE", "VT_STREAMED_OBJECT",
242 "VT_STORED_OBJECT", "VT_BLOB_OBJECT", "VT_CF", "VT_CLSID"
246 if (type <= VT_LPWSTR)
248 g_return_val_if_fail (type >= VT_FILETIME, "_UNKNOWN_");
249 g_return_val_if_fail (type <= VT_CLSID, "_UNKNOWN_");
250 return names2[type-VT_FILETIME];
254 msole_prop_id_to_gsf (GsfMSOleMetaDataSection *section, guint32 id, gboolean *linked)
256 char const *res = NULL;
257 GsfMSOleMetaDataPropMap const *map = NULL;
261 if (section->dict != NULL) {
262 if (id & 0x1000000) {
265 d (g_print ("LINKED "););
268 res = g_hash_table_lookup (section->dict, GINT_TO_POINTER (id));
276 map = builtin_props ;
277 i = G_N_ELEMENTS (builtin_props);
279 if (map[i].id == id &&
280 (map[i].section == COMMON_PROP || map[i].section == section->type)) {
281 d (g_print (map[i].gsf_name););
282 return map[i].gsf_name;
285 d (g_print ("_UNKNOWN_(0x%x %d)", id, id););
290 static GsfMSOleMetaDataPropMap const *
291 msole_gsf_name_to_prop (char const *name)
293 if (NULL == name_to_prop_hash) {
295 name_to_prop_hash = g_hash_table_new (g_str_hash, g_str_equal);
296 for (i = G_N_ELEMENTS (builtin_props); i-- > 0; )
297 g_hash_table_replace (name_to_prop_hash,
298 (gpointer) builtin_props[i].gsf_name,
299 (gpointer) (builtin_props+i));
302 return g_hash_table_lookup (name_to_prop_hash, (gpointer)name);
306 set_error_missing_data (GError **error, const char *property_name, gsize size_needed, gsize size_gotten)
308 gchar *size_needed_str, *size_gotten_str;
310 size_needed_str = g_strdup_printf ("%" G_GSIZE_FORMAT, size_needed);
311 size_gotten_str = g_strdup_printf ("%" G_GSIZE_FORMAT, size_gotten);
314 GSF_ERROR_INVALID_DATA,
315 _("Missing data when reading the %s property; got %s bytes, "
316 "but %s bytes at least are needed."),
320 g_free (size_needed_str);
321 g_free (size_gotten_str);
324 /* Can return errors from gsf_blob_new() and GSF_ERROR_INVALID_DATA */
326 parse_vt_cf (GValue *res, guint8 const **data, guint8 const *data_end, GError **error)
328 /* clipboard size uint32 sizeof (clipboard format tag) + sizeof (clipboard data)
329 * clipboard format tag int32 see below
330 * clipboard data byte[] see below
332 * Clipboard format tag:
333 * -1 - Windows clipboard format
334 * -2 - Macintosh clipboard format
335 * -3 - GUID that contains a format identifier (FMTID)
336 * >0 - custom clipboard format name plus data (see msdn site below)
340 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/stg/stg/propvariant.asp
341 * http://jakarta.apache.org/poi/hpsf/thumbnails.html
342 * http://linux.com.hk/docs/poi/org/apache/poi/hpsf/Thumbnail.html
343 * http://sparks.discreet.com/knowledgebase/public/solutions/ExtractThumbnailImg.htm
345 guint32 clip_size, clip_data_size;
348 GsfClipData *clip_data;
350 /* Clipboard size field */
352 if (data_end < *data + 4) {
353 set_error_missing_data (error, "VT_CF", 4, data_end - *data);
357 clip_size = GSF_LE_GET_GUINT32 (*data);
359 if (clip_size < 4) { /* must emcompass int32 format plus data size */
362 size_str = g_strdup_printf ("%" G_GSIZE_FORMAT, (gsize) clip_size);
365 GSF_ERROR_INVALID_DATA,
366 _("Corrupt data in the VT_CF property; clipboard data length must be at least 4 bytes, "
367 "but the data says it only has %s bytes available."),
375 /* Check clipboard format plus data size */
377 if (data_end < *data + clip_size) {
378 set_error_missing_data (error, "VT_CF", clip_size, data_end - *data);
382 clip_format = GSF_LE_GET_GINT32 (*data);
385 switch (clip_format) {
386 case GSF_CLIP_FORMAT_WINDOWS_CLIPBOARD:
387 case GSF_CLIP_FORMAT_MACINTOSH_CLIPBOARD:
388 case GSF_CLIP_FORMAT_GUID:
389 case GSF_CLIP_FORMAT_NO_DATA:
390 /* everything is ok */
395 clip_format = GSF_CLIP_FORMAT_CLIPBOARD_FORMAT_NAME;
397 clip_format = GSF_CLIP_FORMAT_UNKNOWN;
402 clip_data_size = clip_size - 4;
404 blob = gsf_blob_new (clip_data_size, *data, error);
406 *data += clip_data_size;
411 clip_data = gsf_clip_data_new (clip_format, blob);
412 g_object_unref (blob);
414 g_value_init (res, GSF_TYPE_CLIP_DATA);
415 g_value_set_object (res, clip_data);
416 g_object_unref (clip_data);
422 * Return a number no bigger than the number of bytes used for a property
423 * value of a given type. The returned number might be too small, but
424 * we try to return as big a value as possible.
427 msole_prop_min_size (guint32 type)
483 case VT_STREAMED_OBJECT:
484 case VT_STORED_OBJECT:
490 #define NEED_RECS(_n,_size1) \
492 guint _s1 = (_size1); \
493 bytes_needed = (_n); \
494 if (_s1 > 0 && (data_end - *data) / _s1 < bytes_needed) { \
495 g_warning ("Invalid MS property or file truncated"); \
498 bytes_needed *= _s1; \
501 #define NEED_BYTES(_n) NEED_RECS(_n,1)
503 #define ADVANCE do { *data += bytes_needed; } while (0)
506 msole_prop_parse (GsfMSOleMetaDataSection *section,
507 guint32 type, guint8 const **data, guint8 const *data_end)
512 gboolean const is_vector = type & VT_VECTOR;
516 g_return_val_if_fail (!(type & (unsigned)(~0x1fff)), NULL); /* not valid in a prop set */
522 * A vector is basically an array. If the type associated with
523 * it is a variant, then each element can have a different
524 * variant type. Otherwise, each element has the same variant
525 * type associated with the vector.
527 unsigned i, n, size1;
528 GsfDocPropVector *vector;
531 n = GSF_LE_GET_GUINT32 (*data);
534 d (g_print (" array with %d elem\n", n);
535 gsf_mem_dump (*data, (unsigned)(data_end - *data)););
537 size1 = msole_prop_min_size (type);
540 vector = gsf_docprop_vector_new ();
542 for (i = 0 ; i < n ; i++) {
544 d (g_print ("\t[%d] ", i););
545 v = msole_prop_parse (section, type, data, data_end);
547 if (G_IS_VALUE (v)) {
548 gsf_docprop_vector_append (vector, v);
555 res = g_new0 (GValue, 1);
556 g_value_init (res, GSF_DOCPROP_VECTOR_TYPE);
557 g_value_set_object (res, vector);
558 g_object_unref (vector);
562 res = g_new0 (GValue, 1);
563 d (g_print ("%s\n", msole_vt_name (type)););
567 * A property with a type indicator of VT_EMPTY has no data
568 * associated with it; that is, the size of the value is zero.
570 /* value::unset == empty */
574 /* This is like a pointer to NULL */
575 /* value::unset == null too :-) do we need to distinguish ? */
579 /* 2-byte signed integer */
581 g_value_init (res, G_TYPE_INT);
582 g_value_set_int (res, GSF_LE_GET_GINT16 (*data));
587 /* 4-byte signed integer */
589 g_value_init (res, G_TYPE_INT);
590 g_value_set_int (res, GSF_LE_GET_GINT32 (*data));
595 /* 32-bit IEEE floating-point value */
597 g_value_init (res, G_TYPE_FLOAT);
598 g_value_set_float (res, GSF_LE_GET_FLOAT (*data));
603 /* 64-bit IEEE floating-point value */
605 g_value_init (res, G_TYPE_DOUBLE);
606 g_value_set_double (res, GSF_LE_GET_DOUBLE (*data));
611 /* 8-byte two's complement integer (scaled by 10,000) */
613 /* CHEAT : just store as an int64 for now */
614 g_value_init (res, G_TYPE_INT64);
615 g_value_set_int64 (res, GSF_LE_GET_GINT64 (*data));
620 * 64-bit floating-point number representing the number of days
621 * (not seconds) since December 31, 1899.
623 if (msole_debug (DEBUG_UNKNOWN_PROPS))
624 g_warning ("Unhandled property value type %d (0x%x)",
631 * Pointer to null-terminated Unicode string; the string is pre-
632 * ceeded by a DWORD representing the byte count of the number
633 * of bytes in the string (including the terminating null).
635 if (msole_debug (DEBUG_UNKNOWN_PROPS))
636 g_warning ("Unhandled property value type %d (0x%x)",
643 if (msole_debug (DEBUG_UNKNOWN_PROPS))
644 g_warning ("Unhandled property value type %d (0x%x)",
649 /* A boolean (WORD) value containg 0 (false) or -1 (true). */
651 g_value_init (res, G_TYPE_BOOLEAN);
652 g_value_set_boolean (res, **data ? TRUE : FALSE);
656 case VT_VARIANT : d (g_print ("\tcontaining a "););
658 * A type indicator (a DWORD) followed by the corresponding
659 * value. VT_VARIANT is only used in conjunction with
664 type = GSF_LE_GET_GUINT32 (*data);
666 return msole_prop_parse (section, type, data, data_end);
669 /* 1-byte unsigned integer */
671 g_value_init (res, G_TYPE_UCHAR);
672 g_value_set_uchar (res, GSF_LE_GET_GUINT8 (*data));
677 /* 1-byte unsigned integer */
679 g_value_init (res, G_TYPE_CHAR);
680 g_value_set_char (res, GSF_LE_GET_GINT8 (*data));
685 /* 2-byte unsigned integer */
687 g_value_init (res, G_TYPE_UINT);
688 g_value_set_uint (res, GSF_LE_GET_GUINT16 (*data));
693 /* 4-type unsigned integer */
695 g_value_init (res, G_TYPE_UINT);
696 g_value_set_uint (res, GSF_LE_GET_GUINT32 (*data));
700 case VT_I8 : d (g_print ("VT_I8\n"););
701 /* 8-byte signed integer */
703 g_value_init (res, G_TYPE_INT64);
704 g_value_set_int64 (res, GSF_LE_GET_GINT64 (*data));
709 /* 8-byte unsigned integer */
711 g_value_init (res, G_TYPE_UINT64);
712 g_value_set_uint64 (res, GSF_LE_GET_GUINT64 (*data));
718 * This is the representation of many strings. It is stored in
719 * the same representation as VT_BSTR. Note that the serialized
720 * representation of VP_LPSTR has a preceding byte count, whereas
721 * the in-memory representation does not.
724 len = GSF_LE_GET_GUINT32 (*data);
727 NEED_RECS (len, section->char_size);
729 g_return_val_if_fail (len < 0x10000, NULL);
732 d (gsf_mem_dump (*data, len * section->char_size););
733 str = g_convert_with_iconv (*data,
734 len * section->char_size,
735 section->iconv_handle, NULL, NULL, &error);
737 g_value_init (res, G_TYPE_STRING);
739 g_value_set_string (res, str);
741 } else if (NULL != error) {
742 g_warning ("error: %s", error->message);
743 g_error_free (error);
745 g_warning ("unknown error converting string property, using blank");
752 * A counted and null-terminated Unicode string; a DWORD character
753 * count (where the count includes the terminating null) followed
754 * by that many Unicode (16-bit) characters. Note that the count
755 * is character count, not byte count.
759 len = GSF_LE_GET_GUINT32 (*data);
764 g_return_val_if_fail (len < 0x10000, NULL);
767 d (gsf_mem_dump (*data, len * 2););
768 str = g_convert (*data, len * 2,
769 "UTF-8", "UTF-16LE", NULL, NULL, &error);
771 g_value_init (res, G_TYPE_STRING);
773 g_value_set_string (res, str);
775 } else if (NULL != error) {
776 g_warning ("error: %s", error->message);
777 g_error_free (error);
779 g_warning ("unknown error converting string property, using blank");
785 /* 64-bit FILETIME structure, as defined by Win32. */
791 /* ft * 100ns since Jan 1 1601 */
792 ft = GSF_LE_GET_GUINT64 (*data);
794 ft /= 10000000; /* convert to seconds */
795 ft -= G_GINT64_CONSTANT (11644473600); /* move to Jan 1 1970 */
796 ts = gsf_timestamp_new ();
797 gsf_timestamp_set_time (ts, ft);
798 g_value_init (res, GSF_TIMESTAMP_TYPE);
799 gsf_value_set_timestamp (res, ts);
800 gsf_timestamp_free (ts);
808 * A DWORD count of bytes, followed by that many bytes of data.
809 * The byte count does not include the four bytes for the length
810 * of the count itself: An empty blob would have a count of
811 * zero, followed by zero bytes. Thus the serialized represen-
812 * tation of a VT_BLOB is similar to that of a VT_BSTR but does
813 * not guarantee a null byte at the end of the data.
817 if (msole_debug (DEBUG_UNKNOWN_PROPS))
818 g_warning ("Unhandled property value type %d (0x%x)",
826 * Indicates the value is stored in a stream that is sibling
827 * to the CONTENTS stream. Following this type indicator is
828 * data in the format of a serialized VT_LPSTR, which names
829 * the stream containing the data.
831 if (msole_debug (DEBUG_UNKNOWN_PROPS))
832 g_warning ("Unhandled property value type %d (0x%x)",
840 * Indicates the value is stored in an IStorage that is
841 * sibling to the CONTENTS stream. Following this type
842 * indicator is data in the format of a serialized VT_LPSTR,
843 * which names the IStorage containing the data.
845 if (msole_debug (DEBUG_UNKNOWN_PROPS))
846 g_warning ("Unhandled property value type %d (0x%x)",
852 case VT_STREAMED_OBJECT:
854 * Same as VT_STREAM, but indicates that the stream contains a
855 * serialized object, which is a class ID followed by initiali-
856 * zation data for the class.
858 if (msole_debug (DEBUG_UNKNOWN_PROPS))
859 g_warning ("Unhandled property value type %d (0x%x)",
865 case VT_STORED_OBJECT :
867 * Same as VT_STORAGE, but indicates that the designated
868 * IStorage contains a loadable object.
870 if (msole_debug (DEBUG_UNKNOWN_PROPS))
871 g_warning ("Unhandled property value type %d (0x%x)",
877 case VT_BLOB_OBJECT :
879 * Contains a serialized object in the same representation as
880 * would appear in a VT_STREAMED_OBJECT. That is, following
881 * the VT_BLOB_OBJECT tag is a DWORD byte count of the
882 * remaining data (where the byte count does not include the
883 * size of itself) which is in the format of a class ID
884 * followed by initialization data for that class
886 if (msole_debug (DEBUG_UNKNOWN_PROPS))
887 g_warning ("Unhandled property value type %d (0x%x)",
895 if (!parse_vt_cf (res, data, data_end, &error)) {
896 /* suck, we can't propagate the error upwards */
898 g_warning ("error: %s", error->message);
899 g_error_free (error);
902 g_warning ("unknown error parsing vt_cf");
910 /* A class ID (or other GUID) */
918 /* A DWORD containing a status code. */
928 case VT_USERDEFINED :
929 g_warning ("type %s (0x%x) is not permitted in property sets",
930 msole_vt_name (type), type);
936 if (msole_debug (DEBUG_UNKNOWN_PROPS))
937 g_warning ("Unknown property type %d (0x%x)",
943 if (res != NULL && G_IS_VALUE (res)) {
945 char *val = g_strdup_value_contents (res);
946 g_print ("%s\n", val);
951 char const *type_name = msole_vt_name (type);
953 g_printerr ("A '%s' property could not be parsed\n", type_name);
955 g_printerr ("A %d property could not be parsed\n", type);
968 msole_prop_read (GsfInput *in,
969 GsfMSOleMetaDataSection *section,
970 GsfMSOleMetaDataProp *props,
972 GsfDocMetaData *accum)
976 gsf_off_t size = ((i+1) >= section->num_props)
982 g_return_val_if_fail (i < section->num_props, FALSE);
983 g_return_val_if_fail (size >= props[i].offset + 4, FALSE);
985 size -= props[i].offset; /* includes the type id */
986 /* From now on, size is actually a size. */
987 if (gsf_input_seek (in, section->offset+props[i].offset, G_SEEK_SET) ||
988 NULL == (data = gsf_input_read (in, size, NULL))) {
989 g_warning ("failed to read prop #%d", i);
993 type = GSF_LE_GET_GUINT32 (data);
996 /* dictionary is magic */
997 if (props[i].id == 0) {
998 guint32 len, id, j, n;
1001 guint8 const *start = data;
1003 g_return_val_if_fail (section->dict == NULL, FALSE);
1005 section->dict = g_hash_table_new_full (
1006 g_direct_hash, g_direct_equal,
1009 d ({ g_print ("Dictionary = \n"); gsf_mem_dump (data-4, size); });
1011 for (j = 0 ; j < n ; j++) {
1012 id = GSF_LE_GET_GUINT32 (data);
1013 len = GSF_LE_GET_GUINT32 (data + 4);
1015 g_return_val_if_fail (len < 0x10000, FALSE);
1018 name = g_convert_with_iconv (data + 8,
1019 len * section->char_size,
1020 section->iconv_handle, &gslen, NULL, NULL);
1021 len = (guint32)gslen;
1024 d (g_print ("\t%u == %s\n", id, name););
1025 g_hash_table_replace (section->dict,
1026 GINT_TO_POINTER (id), name);
1028 /* MS documentation blows goats !
1029 * The docs claim there are padding bytes in the dictionary.
1030 * Their examples show padding bytes.
1031 * In reality non-unicode strings do not see to have padding.
1033 if (section->char_size != 1 && (data - start) % 4)
1034 data += 4 - ((data - start) % 4);
1038 d (g_print ("===> %u) ", i);
1039 gsf_mem_dump (data-4, size););
1041 name = g_strdup (msole_prop_id_to_gsf (section, props[i].id, &linked));
1042 d (g_print (" @ %x %x = ", (unsigned)props[i].offset, (unsigned)size););
1043 val = msole_prop_parse (section, type, &data, data + size);
1045 if (NULL != name && NULL != val) {
1047 GsfDocProp *prop = gsf_doc_meta_data_lookup (accum, name);
1049 g_warning ("linking property '%s' before it\'s value is specified",
1050 (name ? name : "<null>"));
1051 } else if (!G_VALUE_HOLDS_STRING (val)) {
1052 g_warning ("linking property '%s' before it\'s value is specified",
1053 (name ? name : "<null>"));
1055 gsf_doc_prop_set_link (prop,
1056 g_value_dup_string (val));
1058 gsf_doc_meta_data_insert (accum, name, val);
1065 if (G_IS_VALUE (val))
1066 g_value_unset (val);
1076 msole_prop_cmp (gconstpointer a, gconstpointer b)
1078 GsfMSOleMetaDataProp const *prop_a = a;
1079 GsfMSOleMetaDataProp const *prop_b = b;
1081 if (prop_a->offset < prop_b->offset)
1083 else if (prop_a->offset > prop_b->offset)
1090 * gsf_msole_metadata_read :
1092 * @accum : #GsfDocMetaData
1094 * Read a stream formated as a set of MS OLE properties from @in and store the
1095 * results in @accum.
1097 * Returns: GError which the caller must free on error.
1100 gsf_msole_metadata_read (GsfInput *in, GsfDocMetaData *accum)
1104 guint32 os, num_sections;
1106 GsfMSOleMetaDataSection *sections;
1107 GsfMSOleMetaDataProp *props;
1110 /* http://bugzilla.gnome.org/show_bug.cgi?id=352055
1111 * psiwin generates files with empty property sections */
1112 if (gsf_input_size (in) <= 0)
1115 data = gsf_input_read (in, 28, NULL);
1117 return g_error_new (gsf_input_error_id (), 0,
1118 "Unable to read MS property stream header");
1120 d ({g_print ("===================================\n"
1121 "header class id ==\n");
1122 gsf_mem_dump (data, 28);});
1124 * Validate the Property Set Header.
1126 * 00 - 01 Byte order 0xfffe
1128 * 04 - 05 OS Version high word is the OS
1129 * 06 - 07 low word is the OS version
1133 * 08 - 23 Class Identifier Usually Format ID
1134 * 24 - 27 Section count Should be at least 1
1136 os = GSF_LE_GET_GUINT16 (data + 6);
1137 version = GSF_LE_GET_GUINT16 (data + 2);
1138 num_sections = GSF_LE_GET_GUINT32 (data + 24);
1139 if (GSF_LE_GET_GUINT16 (data + 0) != 0xfffe
1140 || (version != 0 && version != 1)
1142 || num_sections > gsf_input_size(in) / 20
1143 || num_sections > 100) /* arbitrary sanity check */
1144 return g_error_new (gsf_input_error_id (), 0,
1145 "Invalid MS property stream header");
1147 /* extract the section info */
1149 * The Format ID/Offset list follows.
1151 * 00 - 16 Section Name Format ID
1152 * 16 - 19 Section Offset The offset is the number of
1153 * bytes from the start of the
1154 * whole stream to where the
1157 sections = (GsfMSOleMetaDataSection *)g_alloca (sizeof (GsfMSOleMetaDataSection)* num_sections);
1158 for (i = 0 ; i < num_sections ; i++) {
1159 data = gsf_input_read (in, 20, NULL);
1161 return g_error_new (gsf_input_error_id (), 0,
1162 "Unable to read MS property stream header");
1163 if (!memcmp (data, component_guid, sizeof (component_guid)))
1164 sections [i].type = COMPONENT_PROP;
1165 else if (!memcmp (data, document_guid, sizeof (document_guid)))
1166 sections [i].type = DOC_PROP;
1167 else if (!memcmp (data, user_guid, sizeof (user_guid)))
1168 sections [i].type = USER_PROP;
1170 sections [i].type = USER_PROP;
1171 g_warning ("Unknown property section type, treating it as USER");
1172 gsf_mem_dump (data, 16);
1175 sections [i].offset = GSF_LE_GET_GUINT32 (data + 16);
1179 * A section is the third part of the property set stream.
1181 * 00 - 03 Section size A byte count for the section (which is inclusive
1182 * of the byte count itself and should always be a
1184 * 04 - 07 Property count A count of the number of properties
1185 * 08 - xx An array of 32-bit Property ID/Offset pairs
1186 * yy - zz An array of Property Type indicators/Value pairs
1188 for (i = 0 ; i < num_sections ; i++) {
1189 if (gsf_input_seek (in, sections[i].offset, G_SEEK_SET) ||
1190 NULL == (data = gsf_input_read (in, 8, NULL)))
1191 return g_error_new (gsf_input_error_id (), 0,
1192 "Invalid MS property section");
1194 sections[i].iconv_handle = (GIConv)-1;
1195 sections[i].char_size = 1;
1196 sections[i].dict = NULL;
1197 sections[i].size = GSF_LE_GET_GUINT32 (data); /* includes header */
1198 sections[i].num_props = GSF_LE_GET_GUINT32 (data + 4);
1200 d (g_print ("=============================================\n"
1201 "===> section #%d : type %d at offset 0x%x, size 0x%x, numprops = %u\n",
1202 i, (int)sections [i].type,
1203 (guint32)sections [i].offset,
1205 sections[i].num_props););
1207 if (sections[i].num_props <= 0)
1209 if (sections[i].num_props > gsf_input_remaining(in) / 8)
1210 return g_error_new (gsf_input_error_id (), 0,
1211 "Invalid MS property stream header or file truncated");
1213 if (sections[i].offset + sections[i].size > gsf_input_size(in))
1214 return g_error_new (gsf_input_error_id (), 0,
1215 "Invalid MS property stream header or file truncated");
1218 * Get and save all the Property ID/Offset pairs.
1220 * 00 - 03 id Property ID
1221 * 04 - 07 offset The distance from the start of the section to the
1222 * start of the Property Type/Value pair.
1224 d (g_print ("Offsets\n"););
1225 props = g_new (GsfMSOleMetaDataProp, sections[i].num_props);
1226 for (j = 0; j < sections[i].num_props; j++) {
1227 if (NULL == (data = gsf_input_read (in, 8, NULL))) {
1229 return g_error_new (gsf_input_error_id (), 0,
1230 "Invalid MS property section");
1233 props[j].id = GSF_LE_GET_GUINT32 (data);
1234 props[j].offset = GSF_LE_GET_GUINT32 (data + 4);
1235 d (g_print ("%d) ID=%d, offset=0x%x\n", j,
1236 props [j].id, (unsigned)props [j].offset););
1239 /* FIXME: Should we check that ids are distinct? */
1241 /* order prop info by offset to facilitate bounds checking */
1242 qsort (props, sections[i].num_props,
1243 sizeof (GsfMSOleMetaDataProp),
1246 /* Sanity checks. */
1247 for (j = 0; j < sections[i].num_props; j++) {
1248 guint end = (j == sections[i].num_props - 1)
1250 : props[j + 1].offset;
1251 if (props[j].offset < 0 || props[j].offset + 4 > end) {
1253 return g_error_new (gsf_input_error_id (), 0,
1254 "Invalid MS property section");
1259 * Find and process the code page.
1260 * Property ID 1 is reserved as an indicator of the code page.
1262 sections[i].iconv_handle = (GIConv)-1;
1263 sections[i].char_size = 1;
1264 for (j = 0; j < sections[i].num_props; j++) /* first codepage */
1265 if (props[j].id == 1) {
1266 msole_prop_read (in, sections+i, props, j, accum);
1267 if (NULL != (prop = gsf_doc_meta_data_lookup (accum, GSF_META_NAME_CODEPAGE))) {
1268 GValue const *val = gsf_doc_prop_get_val (prop);
1269 if (NULL != val && G_VALUE_HOLDS_INT (val)) {
1270 int codepage = g_value_get_int (val);
1271 sections[i].iconv_handle =
1272 gsf_msole_iconv_open_for_import (codepage);
1273 if (codepage == 1200 || codepage == 1201)
1274 sections[i].char_size = 2;
1279 if (sections[i].iconv_handle == (GIConv)-1)
1280 sections[i].iconv_handle = gsf_msole_iconv_open_for_import (1252);
1283 * Find and process the Property Set Dictionary
1284 * Property ID 0 is reserved as an indicator of the dictionary.
1285 * For User Defined Sections, Property ID 0 is NOT a dictionary.
1287 for (j = 0; j < sections[i].num_props; j++) /* then dictionary */
1288 if (props[j].id == 0)
1289 msole_prop_read (in, sections+i, props, j, accum);
1291 /* Process all the properties */
1292 for (j = 0; j < sections[i].num_props; j++) /* the rest */
1293 if (props[j].id > 1)
1294 msole_prop_read (in, sections+i, props, j, accum);
1296 gsf_iconv_close (sections[i].iconv_handle);
1298 if (sections[i].dict != NULL)
1299 g_hash_table_destroy (sections[i].dict);
1304 /****************************************************************************/
1308 gboolean doc_not_component;
1312 unsigned count; /* includes 2nd prop for links */
1319 static GsfMSOleVariantType
1320 gvalue_to_msole_vt (GValue const *value, GsfMSOleMetaDataPropMap const *map)
1322 g_return_val_if_fail (value != NULL, VT_EMPTY);
1324 switch (G_TYPE_FUNDAMENTAL (G_VALUE_TYPE (value))) {
1325 case G_TYPE_BOOLEAN: return VT_BOOL;
1326 case G_TYPE_UCHAR: return VT_UI1;
1327 case G_TYPE_FLOAT: return VT_R4;
1328 case G_TYPE_DOUBLE: return VT_R8;
1329 case G_TYPE_STRING: return VT_LPSTR;
1331 return (NULL != map && map->prefered_type == VT_I2)
1334 return (NULL != map && map->prefered_type == VT_UI2)
1337 if (VAL_IS_GSF_TIMESTAMP (value))
1341 if (VAL_IS_GSF_DOCPROP_VECTOR (value)) {
1342 GValueArray *vector = gsf_value_get_docprop_varray (value);
1344 GsfMSOleVariantType type, tmp;
1350 type = map->prefered_type & (~VT_VECTOR);
1351 if (type == VT_VARIANT)
1352 return VT_VECTOR | VT_VARIANT;
1355 n = vector->n_values;
1356 for (i = 0; i < n; i++) {
1357 tmp = gvalue_to_msole_vt (
1358 g_value_array_get_nth (vector, i), NULL);
1359 if (type == VT_UNKNOWN)
1361 else if (type != tmp)
1362 return VT_VECTOR | VT_VARIANT;
1364 return VT_VECTOR | type;
1371 /* Returns: TRUE on success */
1373 msole_metadata_write_prop (WritePropState *state,
1375 GValue const *value,
1376 gboolean suppress_type)
1378 static guint8 const zero[1] = { '\0' };
1379 GsfMSOleMetaDataPropMap const *map =
1380 (name != NULL) ? msole_gsf_name_to_prop (name) : NULL;
1381 GsfMSOleVariantType type;
1384 g_return_val_if_fail (value != NULL, FALSE);
1386 type = gvalue_to_msole_vt (value, map);
1387 if (!suppress_type) {
1388 GSF_LE_SET_GUINT32 (buf, type);
1389 gsf_output_write (state->out, 4, buf);
1391 if (NULL != map && map->prefered_type != type) {
1392 d(g_print ("Exporting property '%s' with type 0x%x rather than the usual 0x%x\n",
1393 map->gsf_name, type, map->prefered_type););
1396 if (type & VT_VECTOR) {
1397 GValueArray *vector = gsf_value_get_docprop_varray (value);
1398 unsigned i, n = vector->n_values;
1401 GSF_LE_SET_GINT32 (buf, n);
1402 res = gsf_output_write (state->out, 4, buf);
1403 for (i = 0; i < n; i++)
1404 res &= msole_metadata_write_prop (state, NULL,
1405 g_value_array_get_nth (vector, i),
1406 type != (VT_VECTOR | VT_VARIANT));
1412 if (g_value_get_boolean (value))
1413 GSF_LE_SET_GINT32 (buf, 0xffffffff);
1415 GSF_LE_SET_GINT32 (buf, 0);
1416 return gsf_output_write (state->out, 4, buf);
1418 GSF_LE_SET_GUINT32 (buf, g_value_get_uchar (value));
1419 return gsf_output_write (state->out, 4, buf);
1421 GSF_LE_SET_GINT16 (buf, g_value_get_int (value));
1422 GSF_LE_SET_GUINT16 (buf+2, 0);
1423 return gsf_output_write (state->out, 4, buf);
1425 GSF_LE_SET_GINT32 (buf, g_value_get_int (value));
1426 return gsf_output_write (state->out, 4, buf);
1429 GSF_LE_SET_GUINT32 (buf, g_value_get_uint (value));
1430 return gsf_output_write (state->out, 4, buf);
1432 GSF_LE_SET_FLOAT (buf, g_value_get_float (value));
1433 return gsf_output_write (state->out, 4, buf);
1435 GSF_LE_SET_DOUBLE (buf, g_value_get_double (value));
1436 return gsf_output_write (state->out, 8, buf);
1439 /* FIXME FIXME FIXME TODO : use iconv from codepage */
1440 char const *txt = g_value_get_string (value);
1441 unsigned len = (NULL != txt) ? strlen (txt) : 0;
1442 GSF_LE_SET_GUINT32 (buf, len+1);
1443 return gsf_output_write (state->out, 4, buf) &&
1444 gsf_output_write (state->out, len, txt) &&
1445 gsf_output_write (state->out, 1, zero);
1448 case VT_FILETIME : {
1449 GsfTimestamp const *ts = g_value_get_boxed (value);
1450 gint32 timet_signed = (gint32) ts->timet;
1453 ft = timet_signed + G_GINT64_CONSTANT (11644473600);
1456 GSF_LE_SET_GUINT64 (buf, ft);
1458 return gsf_output_write (state->out, 8, buf);
1465 g_warning ("Ignoring property '%s', how do we export a property of type '%s'",
1466 name ? name : "<unnamed>",
1467 g_type_name (G_TYPE_FUNDAMENTAL (G_VALUE_TYPE (value))));
1472 cb_write_dict (char const *name, gpointer id, WritePropState *state)
1474 static guint8 const zero[1] = { '\0' };
1476 unsigned len = strlen (name) + 1;
1477 GSF_LE_SET_GUINT32 (buf, GPOINTER_TO_UINT (id));
1478 GSF_LE_SET_GUINT32 (buf+4, len+1);
1479 gsf_output_write (state->out, 8, buf);
1480 gsf_output_write (state->out, len, name);
1481 gsf_output_write (state->out, 1, zero);
1485 msole_metadata_write_section (WritePropState *state, gboolean user)
1489 GSList *ptr = user ? state->user.props : state->builtin.props;
1490 unsigned count = user ? state->user.count : state->builtin.count;
1491 gsf_off_t len, base = gsf_output_tell (state->out);
1492 GsfMSOleMetaDataProp *offsets;
1493 GsfMSOleMetaDataPropMap const *map;
1494 GsfDocProp const *prop;
1499 if (user && state->dict == NULL)
1502 /* Skip past the size and id/offset pairs */
1503 if (!gsf_output_seek (state->out,
1506 8 * count /* id/offset pairs */,
1510 memset (&scratch, 0, sizeof (GValue));
1511 g_value_init (&scratch, G_TYPE_STRING);
1513 offsets = g_alloca (sizeof (GsfMSOleMetaDataProp) * count);
1517 offsets[0].offset = gsf_output_tell (state->out);
1518 GSF_LE_SET_GUINT32 (buf, VT_I2);
1519 GSF_LE_SET_GUINT32 (buf+4, state->codepage);
1520 gsf_output_write (state->out, 8, buf);
1525 offsets[1].offset = gsf_output_tell (state->out);
1526 GSF_LE_SET_GUINT32 (buf, g_hash_table_size (state->dict));
1527 gsf_output_write (state->out, 4, buf);
1528 g_hash_table_foreach (state->dict,
1529 (GHFunc) cb_write_dict, state);
1534 offsets[i].offset = gsf_output_tell (state->out);
1537 for (; ptr != NULL && i < count ; ptr = ptr->next, i++) {
1538 offsets[i].offset = gsf_output_tell (state->out);
1540 name = gsf_doc_prop_get_name (prop);
1542 tmp = g_hash_table_lookup (state->dict, name);
1543 offsets[i].id = GPOINTER_TO_INT (tmp);
1544 if (offsets[i].id < 2) {
1545 g_warning ("Invalid ID (%d) for custom name '%s'", offsets[i].id, name);
1549 map = msole_gsf_name_to_prop (name);
1551 g_warning ("Missing map for built-in property '%s'", name);
1554 offsets[i].id = map->id;
1557 msole_metadata_write_prop (state, name,
1558 gsf_doc_prop_get_val (prop), FALSE);
1559 if (gsf_doc_prop_get_link (prop)) {
1561 offsets[i].id = offsets[i-1].id | 0x1000000;
1562 offsets[i].offset = gsf_output_tell (state->out);
1563 g_value_set_static_string (&scratch,
1564 gsf_doc_prop_get_link (prop));
1565 msole_metadata_write_prop (state, NULL, &scratch, FALSE);
1569 len = gsf_output_tell (state->out) - base;
1570 gsf_output_seek (state->out, base, G_SEEK_SET);
1571 GSF_LE_SET_GUINT32 (buf, len);
1572 GSF_LE_SET_GUINT32 (buf+4, count);
1573 gsf_output_write (state->out, 8, buf);
1574 for (i = 0 ; i < count ; i++) {
1575 GSF_LE_SET_GUINT32 (buf, offsets[i].id);
1576 GSF_LE_SET_GUINT32 (buf+4, offsets[i].offset - base);
1577 gsf_output_write (state->out, 8, buf);
1580 return gsf_output_seek (state->out, 0, G_SEEK_END);
1584 cb_count_props (char const *name, GsfDocProp *prop, WritePropState *state)
1586 GsfMSOleMetaDataPropMap const *map = msole_gsf_name_to_prop (name);
1588 /* allocate predefined ids or add it to the dictionary */
1590 if (map->id == 0) return; /* dictionary is handled elsewhere */
1591 if (map->section == (state->doc_not_component ? COMPONENT_PROP : DOC_PROP))
1593 if (map->id == 1) { /*codepage */
1594 GValue const *val = gsf_doc_prop_get_val (prop);
1595 if (NULL != val && G_VALUE_HOLDS_INT (val))
1596 state->codepage = g_value_get_int (val);
1600 d (g_print ("%d) Adding builtin %s'\n",
1601 state->builtin.count, map->gsf_name););
1602 state->builtin.count += gsf_doc_prop_get_link (prop) ? 2 : 1;
1603 state->builtin.props = g_slist_prepend (state->builtin.props, prop);
1604 } else if (state->doc_not_component) { /* keep user props in the document */
1605 d (g_print("user defined named '%s' assigned id = %d\n",
1606 name, state->user.count););
1607 if (NULL == state->dict)
1608 state->dict = g_hash_table_new (g_str_hash, g_str_equal);
1609 g_hash_table_insert (state->dict,
1610 (gpointer) name, GINT_TO_POINTER (state->user.count));
1611 state->user.count += gsf_doc_prop_get_link (prop) ? 2 : 1;
1612 state->user.props = g_slist_prepend (state->user.props, prop);
1617 * gsf_msole_metadata_write :
1619 * @meta_data : #GsfDocMetaData
1620 * @doc_not_component : a kludge to differentiate DocumentSummary from Summary
1622 * Returns: %TRUE on success;
1625 gsf_msole_metadata_write (GsfOutput *out,
1626 GsfDocMetaData const *meta_data,
1627 gboolean doc_not_component)
1629 static guint8 const header[] = {
1630 0xfe, 0xff, /* byte order */
1632 0x04, 0x0a, /* OS : XP == 0xA04 */
1633 0x02, 0x00, /* win32 == 2 */
1634 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* clasid = 0 */
1637 gboolean success = FALSE;
1639 WritePropState state;
1641 state.codepage = 1252;
1644 state.builtin.count = 1; /* codepage */
1645 state.user.count = 2; /* codepage and dictionary */
1646 state.builtin.props = state.user.props = NULL;
1647 state.doc_not_component = doc_not_component;
1648 d (g_print ("================================\nFinding props\n"););
1649 gsf_doc_meta_data_foreach (meta_data,
1650 (GHFunc) cb_count_props, &state);
1651 d (g_print ("Done\n"
1652 "================================\n"););
1654 /* Write stream header */
1655 GSF_LE_SET_GUINT32 (buf, (state.dict != NULL) ? 2 : 1);
1656 if (!gsf_output_write (out, sizeof (header), header) ||
1657 !gsf_output_write (out, 4, buf))
1660 /* Write section header(s) */
1661 GSF_LE_SET_GUINT32 (buf, (state.dict != NULL) ? 0x44 : 0x30);
1662 if (!gsf_output_write (out, 16,
1663 doc_not_component ? document_guid : component_guid) ||
1664 !gsf_output_write (out, 4, buf))
1666 if (state.dict != NULL) {
1667 GSF_LE_SET_GUINT32 (buf, 0);
1668 if (!gsf_output_write (out, sizeof (user_guid), user_guid) ||
1669 !gsf_output_write (out, 4, buf)) /* bogus position, fix it later */
1673 /* Write section(s) */
1674 if (!msole_metadata_write_section (&state, FALSE))
1676 if (state.dict != NULL) {
1677 gsf_off_t base = gsf_output_tell (state.out);
1678 GSF_LE_SET_GUINT32 (buf, base);
1679 if (!gsf_output_seek (state.out, 0x40, G_SEEK_SET) ||
1680 !gsf_output_write (out, 4, buf) ||
1681 !gsf_output_seek (state.out, 0, G_SEEK_END) ||
1682 !msole_metadata_write_section (&state, TRUE))
1688 g_slist_free (state.builtin.props);
1689 g_slist_free (state.user.props);
1690 if (state.dict != NULL)
1691 g_hash_table_destroy (state.dict);
1698 } const gsf_msole_language_ids[] = {
1699 { "-none-", 0x0000 }, /* none (language neutral) */
1700 { "-none-", 0x0400 }, /* none */
1701 { "af_ZA", 0x0436 }, /* Afrikaans */
1702 { "am", 0x045e }, /* Amharic */
1703 { "sq_AL", 0x041c }, /* Albanian */
1704 { "ar_SA", 0x0401 }, /* Arabic (Saudi) */
1705 { "ar_IQ", 0x0801 }, /* Arabic (Iraq) */
1706 { "ar_EG", 0x0c01 }, /* Arabic (Egypt) */
1707 { "ar_LY", 0x1001 }, /* Arabic (Libya) */
1708 { "ar_DZ", 0x1401 }, /* Arabic (Algeria) */
1709 { "ar_MA", 0x1801 }, /* Arabic (Morocco) */
1710 { "ar_TN", 0x1c01 }, /* Arabic (Tunisia) */
1711 { "ar_OM", 0x2001 }, /* Arabic (Oman) */
1712 { "ar_YE", 0x2401 }, /* Arabic (Yemen) */
1713 { "ar_SY", 0x2801 }, /* Arabic (Syria) */
1714 { "ar_JO", 0x2c01 }, /* Arabic (Jordan) */
1715 { "ar_LB", 0x3001 }, /* Arabic (Lebanon) */
1716 { "ar_KW", 0x3401 }, /* Arabic (Kuwait) */
1717 { "ar_AE", 0x3801 }, /* Arabic (United Arab Emirates) */
1718 { "ar_BH", 0x3c01 }, /* Arabic (Bahrain) */
1719 { "ar_QA", 0x4001 }, /* Arabic (Qatar) */
1720 { "as", 0x044d }, /* Assamese */
1721 { "az", 0x042c }, /* Azerbaijani */
1722 { "hy_AM", 0x042b }, /* Armenian */
1723 { "az", 0x044c }, /* Azeri (Latin) az_ */
1724 { "az", 0x082c }, /* Azeri (Cyrillic) az_ */
1725 { "eu_ES", 0x042d }, /* Basque */
1726 { "be_BY", 0x0423 }, /* Belarussian */
1727 { "bn", 0x0445 }, /* Bengali bn_ */
1728 { "bg_BG", 0x0402 }, /* Bulgarian */
1729 { "ca_ES", 0x0403 }, /* Catalan */
1730 { "zh_TW", 0x0404 }, /* Chinese (Taiwan) */
1731 { "zh_CN", 0x0804 }, /* Chinese (PRC) */
1732 { "zh_HK", 0x0c04 }, /* Chinese (Hong Kong) */
1733 { "zh_SG", 0x1004 }, /* Chinese (Singapore) */
1734 { "ch_MO", 0x1404 }, /* Chinese (Macau SAR) */
1735 { "hr_HR", 0x041a }, /* Croatian */
1736 { "cs_CZ", 0x0405 }, /* Czech */
1737 { "da_DK", 0x0406 }, /* Danish */
1738 { "div", 0x465 }, /* Divehi div_*/
1739 { "nl_NL", 0x0413 }, /* Dutch (Netherlands) */
1740 { "nl_BE", 0x0813 }, /* Dutch (Belgium) */
1741 { "en_US", 0x0409 }, /* English (USA) */
1742 { "en_GB", 0x0809 }, /* English (UK) */
1743 { "en_AU", 0x0c09 }, /* English (Australia) */
1744 { "en_CA", 0x1009 }, /* English (Canada) */
1745 { "en_NZ", 0x1409 }, /* English (New Zealand) */
1746 { "en_IE", 0x1809 }, /* English (Ireland) */
1747 { "en_ZA", 0x1c09 }, /* English (South Africa) */
1748 { "en_JM", 0x2009 }, /* English (Jamaica) */
1749 { "en", 0x2409 }, /* English (Caribbean) */
1750 { "en_BZ", 0x2809 }, /* English (Belize) */
1751 { "en_TT", 0x2c09 }, /* English (Trinidad) */
1752 { "en_ZW", 0x3009 }, /* English (Zimbabwe) */
1753 { "en_PH", 0x3409 }, /* English (Phillipines) */
1754 { "et_EE", 0x0425 }, /* Estonian */
1755 { "fo", 0x0438 }, /* Faeroese fo_ */
1756 { "fa_IR", 0x0429 }, /* Farsi */
1757 { "fi_FI", 0x040b }, /* Finnish */
1758 { "fr_FR", 0x040c }, /* French (France) */
1759 { "fr_BE", 0x080c }, /* French (Belgium) */
1760 { "fr_CA", 0x0c0c }, /* French (Canada) */
1761 { "fr_CH", 0x100c }, /* French (Switzerland) */
1762 { "fr_LU", 0x140c }, /* French (Luxembourg) */
1763 { "fr_MC", 0x180c }, /* French (Monaco) */
1764 { "gl", 0x0456 }, /* Galician gl_ */
1765 { "ga_IE", 0x083c }, /* Irish Gaelic */
1766 { "gd_GB", 0x100c }, /* Scottish Gaelic */
1767 { "ka_GE", 0x0437 }, /* Georgian */
1768 { "de_DE", 0x0407 }, /* German (Germany) */
1769 { "de_CH", 0x0807 }, /* German (Switzerland) */
1770 { "de_AT", 0x0c07 }, /* German (Austria) */
1771 { "de_LU", 0x1007 }, /* German (Luxembourg) */
1772 { "de_LI", 0x1407 }, /* German (Liechtenstein) */
1773 { "el_GR", 0x0408 }, /* Greek */
1774 { "gu", 0x0447 }, /* Gujarati gu_ */
1775 { "ha", 0x0468 }, /* Hausa */
1776 { "he_IL", 0x040d }, /* Hebrew */
1777 { "hi_IN", 0x0439 }, /* Hindi */
1778 { "hu_HU", 0x040e }, /* Hungarian */
1779 { "is_IS", 0x040f }, /* Icelandic */
1780 { "id_ID", 0x0421 }, /* Indonesian */
1781 { "iu", 0x045d }, /* Inkutitut */
1782 { "it_IT", 0x0410 }, /* Italian (Italy) */
1783 { "it_CH", 0x0810 }, /* Italian (Switzerland) */
1784 { "ja_JP", 0x0411}, /* Japanese */
1785 { "kn", 0x044b }, /* Kannada kn_ */
1786 { "ks", 0x0860 }, /* Kashmiri (India) ks_ */
1787 { "kk", 0x043f }, /* Kazakh kk_ */
1788 { "kok", 0x0457 }, /* Konkani kok_ */
1789 { "ko_KR", 0x0412 }, /* Korean */
1790 { "ko", 0x0812 }, /* Korean (Johab) ko_ */
1791 { "kir", 0x0440 }, /* Kyrgyz */
1792 { "la", 0x0476 }, /* Latin */
1793 { "lo", 0x0454 }, /* Laothian */
1794 { "lv_LV", 0x0426 }, /* Latvian */
1795 { "lt_LT", 0x0427 }, /* Lithuanian */
1796 { "lt_LT", 0x0827 }, /* Lithuanian (Classic) */
1797 { "mk", 0x042f }, /* FYRO Macedonian */
1798 { "my_MY", 0x043e }, /* Malaysian */
1799 { "my_BN", 0x083e }, /* Malay Brunei Darussalam */
1800 { "ml", 0x044c }, /* Malayalam ml_ */
1801 { "mr", 0x044e }, /* Marathi mr_ */
1802 { "mt", 0x043a }, /* Maltese */
1803 { "mo", 0x0450 }, /* Mongolian */
1804 { "ne_NP", 0x0461 }, /* Napali (Nepal) */
1805 { "ne_IN", 0x0861 }, /* Nepali (India) */
1806 { "nb_NO", 0x0414 }, /* Norwegian (Bokmaal) */
1807 { "nn_NO", 0x0814 }, /* Norwegian (Nynorsk) */
1808 { "or", 0x0448 }, /* Oriya or_ */
1809 { "om", 0x0472 }, /* Oromo (Afan, Galla) */
1810 { "pl_PL", 0x0415 }, /* Polish */
1811 { "pt_BR", 0x0416 }, /* Portuguese (Brazil) */
1812 { "pt_PT", 0x0816 }, /* Portuguese (Portugal) */
1813 { "pa", 0x0446 }, /* Punjabi pa_ */
1814 { "ps", 0x0463 }, /* Pashto (Pushto) */
1815 { "rm", 0x0417 }, /* Rhaeto_Romanic rm_ */
1816 { "ro_RO", 0x0418 }, /* Romanian */
1817 { "ro_MD", 0x0818 }, /* Romanian (Moldova) */
1818 { "ru_RU", 0x0419 }, /* Russian */
1819 { "ru_MD", 0x0819 }, /* Russian (Moldova) */
1820 { "se", 0x043b }, /* Sami (Lappish) se_ */
1821 { "sa", 0x044f }, /* Sanskrit sa_ */
1822 { "sr", 0x0c1a }, /* Serbian (Cyrillic) sr_ */
1823 { "sr", 0x081a }, /* Serbian (Latin) sr_ */
1824 { "sd", 0x0459 }, /* Sindhi sd_ */
1825 { "sk_SK", 0x041b }, /* Slovak */
1826 { "sl_SI", 0x0424 }, /* Slovenian */
1827 { "wen", 0x042e }, /* Sorbian wen_ */
1828 { "so", 0x0477 }, /* Somali */
1829 { "es_ES", 0x040a }, /* Spanish (Spain, Traditional) */
1830 { "es_MX", 0x080a }, /* Spanish (Mexico) */
1831 { "es_ES", 0x0c0a }, /* Spanish (Modern) */
1832 { "es_GT", 0x100a }, /* Spanish (Guatemala) */
1833 { "es_CR", 0x140a }, /* Spanish (Costa Rica) */
1834 { "es_PA", 0x180a }, /* Spanish (Panama) */
1835 { "es_DO", 0x1c0a }, /* Spanish (Dominican Republic) */
1836 { "es_VE", 0x200a }, /* Spanish (Venezuela) */
1837 { "es_CO", 0x240a }, /* Spanish (Colombia) */
1838 { "es_PE", 0x280a }, /* Spanish (Peru) */
1839 { "es_AR", 0x2c0a }, /* Spanish (Argentina) */
1840 { "es_EC", 0x300a }, /* Spanish (Ecuador) */
1841 { "es_CL", 0x340a }, /* Spanish (Chile) */
1842 { "es_UY", 0x380a }, /* Spanish (Uruguay) */
1843 { "es_PY", 0x3c0a }, /* Spanish (Paraguay) */
1844 { "es_BO", 0x400a }, /* Spanish (Bolivia) */
1845 { "es_SV", 0x440a }, /* Spanish (El Salvador) */
1846 { "es_HN", 0x480a }, /* Spanish (Honduras) */
1847 { "es_NI", 0x4c0a }, /* Spanish (Nicaragua) */
1848 { "es_PR", 0x500a }, /* Spanish (Puerto Rico) */
1849 { "sx", 0x0430 }, /* Sutu */
1850 { "sw", 0x0441 }, /* Swahili (Kiswahili/Kenya) */
1851 { "sv_SE", 0x041d }, /* Swedish */
1852 { "sv_FI", 0x081d }, /* Swedish (Finland) */
1853 { "ta", 0x0449 }, /* Tamil ta_ */
1854 { "tt", 0x0444 }, /* Tatar (Tatarstan) tt_ */
1855 { "te", 0x044a }, /* Telugu te_ */
1856 { "th_TH", 0x041e }, /* Thai */
1857 { "ts", 0x0431 }, /* Tsonga ts_ */
1858 { "tn", 0x0432 }, /* Tswana tn_ */
1859 { "tr_TR", 0x041f }, /* Turkish */
1860 { "tl", 0x0464 }, /* Tagalog */
1861 { "tg", 0x0428 }, /* Tajik */
1862 { "bo", 0x0451 }, /* Tibetan */
1863 { "ti", 0x0473 }, /* Tigrinya */
1864 { "uk_UA", 0x0422 }, /* Ukrainian */
1865 { "ur_PK", 0x0420 }, /* Urdu (Pakistan) */
1866 { "ur_IN", 0x0820 }, /* Urdu (India) */
1867 { "uz", 0x0443 }, /* Uzbek (Latin) uz_ */
1868 { "uz", 0x0843 }, /* Uzbek (Cyrillic) uz_ */
1869 { "ven", 0x0433 }, /* Venda ven_ */
1870 { "vi_VN", 0x042a }, /* Vietnamese */
1871 { "cy_GB", 0x0452 }, /* Welsh */
1872 { "xh", 0x0434 }, /* Xhosa xh */
1873 { "yi", 0x043d }, /* Yiddish yi_ */
1874 { "yo", 0x046a }, /* Yoruba */
1875 { "zu", 0x0435 }, /* Zulu zu_ */
1876 { "en_US", 0x0800 } /* Default */
1880 * gsf_msole_lid_for_language
1883 * Returns: the LID (Language Identifier) for the input language.
1884 * If lang is %null, return 0x0400 ("-none-"), and not 0x0000 ("no proofing")
1887 gsf_msole_lid_for_language (char const *lang)
1893 return 0x0400; /* return -none- */
1895 /* Allow lang to match as a prefix (eg fr == fr_FR@euro) */
1896 len = strlen (lang);
1897 for (i = 0 ; i < G_N_ELEMENTS(gsf_msole_language_ids); i++)
1898 if (!strncmp (lang, gsf_msole_language_ids[i].tag, len))
1899 return gsf_msole_language_ids[i].lid;
1901 return 0x0400 ; /* return -none- */
1905 * gsf_msole_language_for_lid :
1908 * Returns: the xx_YY style string (can be just xx or xxx) for the given LID.
1909 * Return value must not be freed. If the LID is not found, is set to
1910 * 0x0400, or is set to 0x0000, will return "-none-"
1913 gsf_msole_language_for_lid (guint lid)
1917 for (i = 0 ; i < G_N_ELEMENTS(gsf_msole_language_ids); i++)
1918 if (gsf_msole_language_ids[i].lid == lid)
1919 return gsf_msole_language_ids[i].tag;
1921 return "-none-"; /* default */
1925 * gsf_msole_locale_to_lid :
1927 * Covert the the codepage into an applicable LID
1930 gsf_msole_codepage_to_lid (int codepage)
1933 case 77: /* MAC_CHARSET */
1934 return 0xFFF; /* This number is a hack */
1935 case 128: /* SHIFTJIS_CHARSET */
1936 return 0x411; /* Japanese */
1937 case 129: /* HANGEUL_CHARSET */
1938 return 0x412; /* Korean */
1939 case 130: /* JOHAB_CHARSET */
1940 return 0x812; /* Korean (Johab) */
1941 case 134: /* GB2312_CHARSET - Chinese Simplified */
1942 return 0x804; /* China PRC - And others!! */
1943 case 136: /* CHINESEBIG5_CHARSET - Chinese Traditional */
1944 return 0x404; /* Taiwan - And others!! */
1945 case 161: /* GREEK_CHARSET */
1946 return 0x408; /* Greek */
1947 case 162: /* TURKISH_CHARSET */
1948 return 0x41f; /* Turkish */
1949 case 163: /* VIETNAMESE_CHARSET */
1950 return 0x42a; /* Vietnamese */
1951 case 177: /* HEBREW_CHARSET */
1952 return 0x40d; /* Hebrew */
1953 case 178: /* ARABIC_CHARSET */
1954 return 0x01; /* Arabic */
1955 case 186: /* BALTIC_CHARSET */
1956 return 0x425; /* Estonian - And others!! */
1957 case 204: /* RUSSIAN_CHARSET */
1958 return 0x419; /* Russian - And others!! */
1959 case 222: /* THAI_CHARSET */
1960 return 0x41e; /* Thai */
1961 case 238: /* EASTEUROPE_CHARSET */
1962 return 0x405; /* Czech - And many others!! */
1970 * gsf_msole_lid_to_codepage
1973 * Returns: our best guess at the codepage for the given language id
1976 gsf_msole_lid_to_codepage (guint lid)
1978 if (lid == 0x0FFF) /* Macintosh Hack */
1981 switch (lid & 0xff) {
1982 case 0x01: /* Arabic */
1984 case 0x02: /* Bulgarian */
1986 case 0x03: /* Catalan */
1988 case 0x04: /* Chinese */
1990 case 0x1004: /* Chinese (Singapore) */
1991 case 0x0404: /* Chinese (Taiwan) */
1992 case 0x1404: /* Chinese (Macau SAR) */
1993 case 0x0c04: /* Chinese (Hong Kong SAR, PRC) */
1996 case 0x0804: /* Chinese (PRC) */
2002 case 0x05: /* Czech */
2004 case 0x06: /* Danish */
2006 case 0x07: /* German */
2008 case 0x08: /* Greek */
2010 case 0x09: /* English */
2012 case 0x0a: /* Spanish */
2014 case 0x0b: /* Finnish */
2016 case 0x0c: /* French */
2018 case 0x0d: /* Hebrew */
2020 case 0x0e: /* Hungarian */
2022 case 0x0f: /* Icelandic */
2024 case 0x10: /* Italian */
2026 case 0x11: /* Japanese */
2028 case 0x12: /* Korean */
2030 case 0x0812: /* Korean (Johab) */
2032 case 0x0412: /* Korean */
2038 case 0x13: /* Dutch */
2040 case 0x14: /* Norwegian */
2042 case 0x15: /* Polish */
2044 case 0x16: /* Portuguese */
2046 case 0x17: /* Rhaeto-Romanic */
2048 case 0x18: /* Romanian */
2050 case 0x19: /* Russian */
2052 case 0x1a: /* Serbian, Croatian, (Bosnian?) */
2054 case 0x041a: /* Croatian */
2056 case 0x0c1a: /* Serbian (Cyrillic) */
2058 case 0x081a: /* Serbian (Latin) */
2064 case 0x1b: /* Slovak */
2066 case 0x1c: /* Albanian */
2068 case 0x1d: /* Swedish */
2070 case 0x1e: /* Thai */
2072 case 0x1f: /* Turkish */
2074 case 0x20: /* Urdu. This is Unicode only. */
2076 case 0x21: /* Bahasa Indonesian */
2078 case 0x22: /* Ukrainian */
2080 case 0x23: /* Byelorussian / Belarusian */
2082 case 0x24: /* Slovenian */
2084 case 0x25: /* Estonian */
2086 case 0x26: /* Latvian */
2088 case 0x27: /* Lithuanian */
2090 case 0x29: /* Farsi / Persian. This is Unicode only. */
2092 case 0x2a: /* Vietnamese */
2094 case 0x2b: /* Windows 2000: Armenian. This is Unicode only. */
2096 case 0x2c: /* Azeri */
2098 case 0x082c: /* Azeri (Cyrillic) */
2104 case 0x2d: /* Basque */
2106 case 0x2f: /* Macedonian */
2108 case 0x36: /* Afrikaans */
2110 case 0x37: /* Windows 2000: Georgian. This is Unicode only. */
2112 case 0x38: /* Faeroese */
2114 case 0x39: /* Windows 2000: Hindi. This is Unicode only. */
2116 case 0x3E: /* Malaysian / Malay */
2118 case 0x41: /* Swahili */
2120 case 0x43: /* Uzbek */
2122 case 0x0843: /* Uzbek (Cyrillic) */
2128 case 0x45: /* Windows 2000: Bengali. This is Unicode only. */
2129 case 0x46: /* Windows 2000: Punjabi. This is Unicode only. */
2130 case 0x47: /* Windows 2000: Gujarati. This is Unicode only. */
2131 case 0x48: /* Windows 2000: Oriya. This is Unicode only. */
2132 case 0x49: /* Windows 2000: Tamil. This is Unicode only. */
2133 case 0x4a: /* Windows 2000: Telugu. This is Unicode only. */
2134 case 0x4b: /* Windows 2000: Kannada. This is Unicode only. */
2135 case 0x4c: /* Windows 2000: Malayalam. This is Unicode only. */
2136 case 0x4d: /* Windows 2000: Assamese. This is Unicode only. */
2137 case 0x4e: /* Windows 2000: Marathi. This is Unicode only. */
2138 case 0x4f: /* Windows 2000: Sanskrit. This is Unicode only. */
2139 case 0x55: /* Myanmar / Burmese. This is Unicode only. */
2140 case 0x57: /* Windows 2000: Konkani. This is Unicode only. */
2141 case 0x61: /* Windows 2000: Nepali (India). This is Unicode only. */
2145 /******************************************************************
2146 * Below this line is untested, unproven, and are just guesses. *
2147 * Insert above and use at your own risk *
2148 ******************************************************************/
2150 case 0x042c: /* Azeri (Latin) */
2151 case 0x0443: /* Uzbek (Latin) */
2152 case 0x30: /* Sutu */
2153 return 1252; /* UNKNOWN, believed to be CP1252 */
2155 case 0x3f: /* Kazakh */
2156 return 1251; /* JUST UNKNOWN, probably CP1251 */
2158 case 0x44: /* Tatar */
2159 case 0x58: /* Manipuri */
2160 case 0x59: /* Sindhi */
2161 case 0x60: /* Kashmiri (India) */
2162 return 0; /* UNKNOWN, believed to be Unicode only */
2166 /* This is just a guess, but it will be a frequent guess */
2171 * gsf_msole_lid_to_codepage_str
2174 * Returns: the Iconv codepage string for the given LID.
2175 * Return value must be g_free ()'d
2178 gsf_msole_lid_to_codepage_str (guint lid)
2182 if (lid == 0x0FFF) /* Macintosh Hack */
2183 return g_strdup ("MACINTOSH");
2185 cp = gsf_msole_lid_to_codepage (lid);
2186 return g_strdup_printf ("CP%d", cp);
2190 * gsf_msole_iconv_win_codepage :
2192 * Returns: our best guess at the applicable windows code page based on an
2193 * environment variable or the current locale.
2196 gsf_msole_iconv_win_codepage (void)
2200 if ((lang = getenv("WINDOWS_LANGUAGE")) == NULL) {
2201 char const *locale = setlocale (LC_CTYPE, NULL);
2202 if (locale != NULL) {
2203 char const *lang_sep = strchr (locale, '.');
2205 lang = g_strndup (locale, (unsigned)(lang_sep - locale));
2207 lang = g_strdup (locale); /* simplifies exit */
2212 guint lid = gsf_msole_lid_for_language (lang);
2214 return gsf_msole_lid_to_codepage (lid);
2216 return 1252; /* default ansi */
2220 gsf_msole_iconv_get_codepage_string_list (int codepage)
2222 GSList *cp_list = NULL;
2226 cp_list = g_slist_prepend (cp_list, g_strdup ("UTF-16LE"));
2229 cp_list = g_slist_prepend (cp_list, g_strdup ("UTF-16BE"));
2233 cp_list = g_slist_prepend (cp_list, g_strdup ("MACROMAN"));
2234 cp_list = g_slist_prepend (cp_list, g_strdup ("MACINTOSH"));
2238 cp_list = g_slist_prepend (cp_list, g_strdup ("UTF-8"));
2241 /* according to OOo docs 8001 is a synonym CP1252 */
2246 cp_list = g_slist_prepend (cp_list, g_strdup_printf ("CP%u", codepage));
2253 * gsf_msole_iconv_open_codepage_for_import :
2257 * Returns: an iconv converter for @codepage -> utf8.
2260 gsf_msole_iconv_open_codepage_for_import (char const *to, int codepage)
2262 GIConv iconv_handle = (GIConv)(-1);
2263 gchar *codepage_str;
2264 GSList *codepage_list, *cp;
2265 g_return_val_if_fail (to != NULL, (GIConv)(-1));
2267 cp = codepage_list = gsf_msole_iconv_get_codepage_string_list (codepage);
2269 codepage_str = cp->data;
2270 if (iconv_handle == (GIConv)(-1))
2271 iconv_handle = g_iconv_open (to, codepage_str);
2272 g_free (codepage_str);
2275 g_slist_free (codepage_list);
2277 if (iconv_handle == (GIConv)(-1))
2278 g_warning ("Unable to open an iconv handle from codepage %d -> %s",
2280 return iconv_handle;
2284 * gsf_msole_iconv_open_for_import :
2287 * Returns: an iconv converter for single byte encodings @codepage -> utf8.
2288 * Attempt to handle the semantics of a specification for multibyte encodings
2289 * since this is only supposed to be used for single bytes.
2292 gsf_msole_iconv_open_for_import (int codepage)
2294 return gsf_msole_iconv_open_codepage_for_import ("UTF-8", codepage);
2298 * gsf_msole_iconv_open_codepages_for_export :
2302 * Returns: an iconv converter to go from utf8 -> to our best guess at a useful
2306 gsf_msole_iconv_open_codepages_for_export (int codepage_to, char const *from)
2308 GIConv iconv_handle = (GIConv)(-1);
2309 gchar *codepage_str;
2310 GSList *codepage_list, *cp;
2311 g_return_val_if_fail (from != NULL, (GIConv)(-1));
2313 cp = codepage_list = gsf_msole_iconv_get_codepage_string_list (codepage_to);
2315 codepage_str = cp->data;
2316 if (iconv_handle == (GIConv)(-1))
2317 iconv_handle = g_iconv_open (codepage_str, from);
2318 g_free (codepage_str);
2321 g_slist_free (codepage_list);
2323 if (iconv_handle == (GIConv)(-1))
2324 g_warning ("Unable to open an iconv handle from %s -> codepage %u",
2326 return iconv_handle;
2330 * gsf_msole_iconv_open_codepage_for_export :
2333 * Returns: an iconv converter to go from utf8 -> to our best guess at a useful
2337 gsf_msole_iconv_open_codepage_for_export (int codepage_to)
2339 return gsf_msole_iconv_open_codepages_for_export (codepage_to, "UTF-8");
2343 * gsf_msole_iconv_open_for_export :
2345 * Returns: an iconv convert to go from utf8 -> to our best guess at a useful
2349 gsf_msole_iconv_open_for_export (void)
2351 return gsf_msole_iconv_open_codepage_for_export (gsf_msole_iconv_win_codepage ());
2354 #define VBA_COMPRESSION_WINDOW 4096
2357 * gsf_msole_inflate:
2358 * @input: stream to read from
2359 * @offset: offset into it for start byte of compresse stream
2361 * Decompresses an LZ compressed stream.
2363 * Return value: A GByteArray that the caller is responsible for freeing
2366 gsf_msole_inflate (GsfInput *input, gsf_off_t offset)
2369 unsigned i, win_pos, pos = 0;
2370 unsigned mask, shift, distance;
2371 guint8 flag, buffer [VBA_COMPRESSION_WINDOW];
2374 gboolean clean = TRUE;
2376 if (gsf_input_seek (input, offset, G_SEEK_SET))
2379 res = g_byte_array_new ();
2381 /* explaination from libole2/ms-ole-vba.c */
2382 /* The first byte is a flag byte. Each bit in this byte
2383 * determines what the next byte is. If the bit is zero,
2384 * the next byte is a character. Otherwise the next two
2385 * bytes contain the number of characters to copy from the
2386 * umcompresed buffer and where to copy them from (offset,
2389 while (NULL != gsf_input_read (input, 1, &flag))
2390 for (mask = 1; mask < 0x100 ; mask <<= 1)
2392 if (NULL == (tmp = gsf_input_read (input, 2, NULL)))
2394 win_pos = pos % VBA_COMPRESSION_WINDOW;
2395 if (win_pos <= 0x80) {
2396 if (win_pos <= 0x20)
2397 shift = (win_pos <= 0x10) ? 12 : 11;
2399 shift = (win_pos <= 0x40) ? 10 : 9;
2401 if (win_pos <= 0x200)
2402 shift = (win_pos <= 0x100) ? 8 : 7;
2403 else if (win_pos <= 0x800)
2404 shift = (win_pos <= 0x400) ? 6 : 5;
2409 token = GSF_LE_GET_GUINT16 (tmp);
2410 len = (token & ((1 << shift) - 1)) + 3;
2411 distance = token >> shift;
2413 /* fprintf (stderr, "Shift %d, token len %d, distance %d bytes %.2x %.2x\n",
2414 shift, len, distance, (token & 0xff), (token >> 8)); */
2416 for (i = 0; i < len; i++) {
2417 unsigned srcpos = (pos - distance - 1) % VBA_COMPRESSION_WINDOW;
2418 guint8 c = buffer [srcpos];
2419 buffer [pos++ % VBA_COMPRESSION_WINDOW] = c;
2422 if ((pos != 0) && ((pos % VBA_COMPRESSION_WINDOW) == 0) && clean) {
2423 (void) gsf_input_read (input, 2, NULL);
2425 g_byte_array_append (res, buffer, VBA_COMPRESSION_WINDOW);
2428 if (NULL != gsf_input_read (input, 1, buffer + (pos % VBA_COMPRESSION_WINDOW)))
2433 if (pos % VBA_COMPRESSION_WINDOW)
2434 g_byte_array_append (res, buffer, pos % VBA_COMPRESSION_WINDOW);