1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
5 * Copyright (C) 2002-2006 Jody Goldberg (jody@gnome.org)
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of version 2.1 of the GNU Lesser General Public
9 * License as published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22 /* Info extracted from
23 * svx/source/msfilter/msvbasic.cxx
24 * Costin Raiu, Kaspersky Labs, 'Apple of Discord'
25 * Virus bulletin's bontchev.pdf, svajcer.pdf
27 * and lots and lots of reading. There are lots of pieces missing still
28 * but the structure seems to hold together.
30 #include <gsf-config.h>
31 #include <gsf/gsf-infile-msvba.h>
32 #include <gsf/gsf-infile-impl.h>
33 #include <gsf/gsf-input-memory.h>
34 #include <gsf/gsf-impl-utils.h>
35 #include <gsf/gsf-msole-utils.h>
36 #include <gsf/gsf-infile-msole.h>
37 #include <gsf/gsf-infile-zip.h>
38 #include <gsf/gsf-open-pkg-utils.h>
39 #include <gsf/gsf-utils.h>
44 static GObjectClass *parent_class;
46 struct _GsfInfileMSVBA {
54 typedef GsfInfileClass GsfInfileMSVBAClass;
56 #define GSF_INFILE_MSVBA_CLASS(k) (G_TYPE_CHECK_CLASS_CAST ((k), GSF_INFILE_MSVBA_TYPE, GsfInfileMSVBAClass))
57 #define GSF_IS_INFILE_MSVBA_CLASS(k) (G_TYPE_CHECK_CLASS_TYPE ((k), GSF_INFILE_MSVBA_TYPE))
60 gsf_vba_inflate (GsfInput *input, gsf_off_t offset, int *size, gboolean add_null_terminator)
62 GByteArray *res = gsf_msole_inflate (input, offset + 3);
66 if (add_null_terminator)
67 g_byte_array_append (res, "", 1);
68 return g_byte_array_free (res, FALSE);
72 vba_extract_module_source (GsfInfileMSVBA *vba, char const *name, guint32 src_offset)
78 g_return_if_fail (name != NULL);
80 module = gsf_infile_child_by_name (vba->source, name);
84 code = gsf_vba_inflate (module, (gsf_off_t) src_offset, &inflated_size, TRUE);
86 if (NULL == vba->modules)
87 vba->modules = g_hash_table_new_full (g_str_hash, g_str_equal,
88 (GDestroyNotify)g_free, (GDestroyNotify)g_free);
89 g_hash_table_insert (vba->modules, g_strdup (name), code);
91 g_warning ("Problems extracting the source for %s @ %u", name, src_offset);
93 g_object_unref (module);
98 * @vba : #GsfInfileMSVBA
99 * @err : optionally NULL
101 * Read an VBA dirctory and its project file.
104 * Returns: %FALSE on error setting @err if it is supplied.
107 vba_dir_read (GsfInfileMSVBA *vba, GError **err)
109 int inflated_size, element_count = -1;
110 char const *msg = NULL;
111 char *name, *elem_stream = NULL;
114 guint8 *inflated_data, *end, *ptr;
116 gboolean failed = TRUE;
118 /* 0. get the stream */
119 dir = gsf_infile_child_by_name (vba->source, "dir");
121 msg = "Can't find the VBA directory stream.";
125 /* 1. decompress it */
126 ptr = inflated_data = gsf_vba_inflate (dir, 0, &inflated_size, FALSE);
127 if (inflated_data == NULL)
128 goto fail_compression;
129 end = inflated_data + inflated_size;
131 /* 2. GUESS : based on several xls with macros and XL8GARY this looks like a
132 * series of sized records. Be _extra_ careful */
139 * 4 <var> project name
148 * 0x0f == number of elements
152 * 0x31 == stream offset of the compressed source !
154 * 0x16 == an ascii dependency name
155 * 0x3e == a unicode dependency name
156 * 0x33 == a classid for a dependency with no trialing data
158 * 0x2f == a dummy classid
160 * 0x0d == the classid
161 * 0x2f, and 0x0d appear contain
162 * uint32 classid_size;
165 * and sometimes some trailing junk
167 if ((ptr + 6) > end) {
168 msg = "vba project header problem";
171 tag = GSF_LE_GET_GUINT16 (ptr);
172 len = GSF_LE_GET_GUINT32 (ptr + 2);
175 if ((ptr + len) > end) {
176 msg = "vba project header problem";
182 name = g_strndup (ptr, len);
184 puts ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
185 printf ("<project name=\"%s\">", name);
190 /* this seems to have an extra two bytes that are not
191 * part of the length ..?? */
196 g_warning ("element count is not what we expected");
199 if (element_count >= 0) {
200 g_warning ("More than one element count ??");
203 element_count = GSF_LE_GET_GUINT16 (ptr);
214 name = g_strndup (ptr, len);
215 g_print ("Depend Name : '%s'\n", name);
225 name = g_strndup (ptr, len);
226 g_print ("Element Name : '%s'\n", name);
230 case 0x19: elem_stream = g_strndup (ptr, len); break;
234 g_warning ("source offset property is not what we expected");
237 vba_extract_module_source (vba, elem_stream,
238 GSF_LE_GET_GUINT32 (ptr));
239 g_free (elem_stream); elem_stream = NULL;
245 g_print ("tag %hx : len %u\n", tag, len);
246 gsf_mem_dump (ptr, len);
252 } while (tag != 0x10);
253 g_free (elem_stream);
255 if (element_count != 0)
256 g_warning ("Number of elements differs from expectations");
261 g_free (inflated_data);
267 g_object_unref (G_OBJECT (dir));
272 *err = g_error_new_literal (gsf_input_error_id (), 0, msg);
278 #define VBA56_DIRENT_RECORD_COUNT (2 + /* magic */ \
280 2 + /* 0x00 0xff */ \
282 #define VBA56_DIRENT_HEADER_SIZE (VBA56_DIRENT_RECORD_COUNT + \
283 2 + /* type1 record count */ \
289 * @vba : #GsfInfileMSVBA
290 * @err : optionally NULL
292 * Read an VBA dirctory and its project file.
295 * Returns: %FALSE on error setting @err if it is supplied.
298 vba_project_read (GsfInfileMSVBA *vba, GError **err)
300 /* NOTE : This seems constant, find some confirmation */
301 static guint8 const signature[] = { 0xcc, 0x61 };
303 guint8 const signature[4];
304 char const * const name;
305 int const vba_version;
306 gboolean const is_mac;
307 } const versions [] = {
308 { { 0x5e, 0x00, 0x00, 0x01 }, "Office 97", 5, FALSE},
309 { { 0x5f, 0x00, 0x00, 0x01 }, "Office 97 SR1", 5, FALSE },
310 { { 0x65, 0x00, 0x00, 0x01 }, "Office 2000 alpha?", 6, FALSE },
311 { { 0x6b, 0x00, 0x00, 0x01 }, "Office 2000 beta?", 6, FALSE },
312 { { 0x6d, 0x00, 0x00, 0x01 }, "Office 2000", 6, FALSE },
313 { { 0x6f, 0x00, 0x00, 0x01 }, "Office 2000", 6, FALSE },
314 { { 0x70, 0x00, 0x00, 0x01 }, "Office XP beta 1/2", 6, FALSE },
315 { { 0x73, 0x00, 0x00, 0x01 }, "Office XP", 6, FALSE },
316 { { 0x76, 0x00, 0x00, 0x01 }, "Office 2003", 6, FALSE },
317 { { 0x79, 0x00, 0x00, 0x01 }, "Office 2003", 6, FALSE },
318 { { 0x60, 0x00, 0x00, 0x0e }, "MacOffice 98", 5, TRUE },
319 { { 0x62, 0x00, 0x00, 0x0e }, "MacOffice 2001", 5, TRUE },
320 { { 0x63, 0x00, 0x00, 0x0e }, "MacOffice X", 6, TRUE },
321 { { 0x64, 0x00, 0x00, 0x0e }, "MacOffice 2004", 6, TRUE },
325 unsigned i, count, len;
330 dir = gsf_infile_child_by_name (vba->source, "dir");
333 *err = g_error_new (gsf_input_error_id (), 0,
334 "Can't find the VBA directory stream.");
338 if (gsf_input_seek (dir, 0, G_SEEK_SET) ||
339 NULL == (data = gsf_input_read (dir, VBA56_DIRENT_HEADER_SIZE, NULL)) ||
340 0 != memcmp (data, signature, sizeof (signature))) {
342 *err = g_error_new (gsf_input_error_id (), 0,
347 for (i = 0 ; i < G_N_ELEMENTS (versions); i++)
348 if (!memcmp (data+2, versions[i].signature, 4))
351 if (i >= G_N_ELEMENTS (versions)) {
353 *err = g_error_new (gsf_input_error_id (), 0,
354 "Unknown VBA version signature 0x%x%x%x%x",
355 data[2], data[3], data[4], data[5]);
359 puts (versions[i].name);
361 /* these depend strings seem to come in 2 blocks */
362 count = GSF_LE_GET_GUINT16 (data + VBA56_DIRENT_RECORD_COUNT);
363 for (; count > 0 ; count--) {
364 if (NULL == ((data = gsf_input_read (dir, 2, NULL))))
366 len = GSF_LE_GET_GUINT16 (data);
367 if (NULL == ((data = gsf_input_read (dir, len, NULL)))) {
368 printf ("len == 0x%x ??\n", len);
372 uni_name = g_new0 (gunichar2, len/2 + 1);
374 /* be wary about endianness */
375 for (i = 0 ; i < len ; i += 2)
376 uni_name [i/2] = GSF_LE_GET_GUINT16 (data + i);
377 name = g_utf16_to_utf8 (uni_name, -1, NULL, NULL, NULL);
380 printf ("%d %s\n", count, name);
382 /* ignore this blob ???? */
383 if (!strncmp ("*\\G", name, 3)) {
384 if (NULL == ((data = gsf_input_read (dir, 12, NULL)))) {
385 printf ("len == 0x%x ??\n", len);
393 g_return_val_if_fail (count == 0, FALSE);
400 gsf_infile_msvba_finalize (GObject *obj)
402 GsfInfileMSVBA *vba = GSF_INFILE_MSVBA (obj);
404 if (NULL != vba->modules) {
405 g_hash_table_destroy (vba->modules);
408 if (vba->source != NULL) {
409 g_object_unref (G_OBJECT (vba->source));
412 parent_class->finalize (obj);
416 gsf_infile_msvba_init (GObject *obj)
418 GsfInfileMSVBA *vba = GSF_INFILE_MSVBA (obj);
422 vba->children = NULL;
426 gsf_infile_msvba_class_init (GObjectClass *gobject_class)
428 gobject_class->finalize = gsf_infile_msvba_finalize;
429 parent_class = g_type_class_peek_parent (gobject_class);
432 GSF_CLASS (GsfInfileMSVBA, gsf_infile_msvba,
433 gsf_infile_msvba_class_init, gsf_infile_msvba_init,
437 gsf_infile_msvba_new (GsfInfile *source, GError **err)
441 g_return_val_if_fail (GSF_IS_INFILE (source), NULL);
443 vba = g_object_new (GSF_INFILE_MSVBA_TYPE, NULL);
444 if (G_UNLIKELY (NULL == vba)) return NULL;
446 g_object_ref (G_OBJECT (source));
447 vba->source = source;
449 /* vba_project_read (vba, err); */
451 /* find the name offset pairs */
452 if (vba_dir_read (vba, err))
453 return GSF_INFILE (vba);
455 if (err != NULL && *err == NULL)
456 *err = g_error_new (gsf_input_error_id (), 0,
457 "Unable to parse VBA header");
459 g_object_unref (G_OBJECT (vba));
464 * gsf_infile_msvba_get_modules :
465 * @vba_stream : #GsfInfile
467 * a collection of names and source code.
469 * Returns: %NULL, or a hashtable of names and source code (unknown encoding).
472 gsf_infile_msvba_get_modules (GsfInfileMSVBA const *vba_stream)
474 g_return_val_if_fail (GSF_IS_INFILE_MSVBA (vba_stream), NULL);
475 return vba_stream->modules;
479 * gsf_infile_msvba_steal_modules :
480 * @vba_stream : #GsfInfile
482 * A collection of names and source code which the caller is responsible for destroying.
484 * Returns: %NULL, or a hashtable of names and source code (unknown encoding).
487 gsf_infile_msvba_steal_modules (GsfInfileMSVBA *vba_stream)
490 g_return_val_if_fail (GSF_IS_INFILE_MSVBA (vba_stream), NULL);
491 res = vba_stream->modules;
492 vba_stream->modules = NULL;
497 * gsf_input_find_vba :
499 * @err : #GError, optionally %NULL.
501 * A utility routine that attempts to find the VBA file withint a stream.
503 * Returns: a GsfInfileMSVBA *gsf_input_find_vba (GsfInput *input, GError *err);
506 gsf_input_find_vba (GsfInput *input, GError **err)
508 GsfInput *vba = NULL;
511 if (NULL != (infile = gsf_infile_msole_new (input, NULL))) {
513 vba = gsf_infile_child_by_vname (infile, "_VBA_PROJECT_CUR", "VBA", NULL);
516 vba = gsf_infile_child_by_vname (infile, "Macros", "VBA", NULL);
518 /* TODO : PPT is more complex */
520 g_object_unref (G_OBJECT (infile));
521 } else if (NULL != (infile = gsf_infile_zip_new (input, NULL))) {
522 GsfInput *main_part = gsf_open_pkg_get_rel_by_type (GSF_INPUT (infile),
523 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument");
525 if (NULL != main_part) {
526 GsfInput *vba_stream = gsf_open_pkg_get_rel_by_type (main_part,
527 "http://schemas.microsoft.com/office/2006/relationships/vbaProject");
528 if (NULL != vba_stream) {
529 GsfInfile *ole = gsf_infile_msole_new (vba_stream, err);
531 vba = gsf_infile_child_by_vname (ole, "VBA", NULL);
532 g_object_unref (G_OBJECT (ole));
534 g_object_unref (G_OBJECT (vba_stream));
536 g_object_unref (G_OBJECT (main_part));
538 g_object_unref (G_OBJECT (infile));
542 return (GsfInfileMSVBA *)
543 gsf_infile_msvba_new (GSF_INFILE (vba), err);