1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3 * Copyright (C) 2000-2012 Jeffrey Stedfast
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public License
7 * as published by the Free Software Foundation; either version 2.1
8 * of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free
17 * Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
29 #include "url-scanner.h"
30 #include "gmime-filter-html.h"
36 * SECTION: gmime-filter-html
37 * @title: GMimeFilterHTML
38 * @short_description: Convert plain text into HTML
39 * @see_also: #GMimeFilter
41 * A #GMimeFilter used for converting plain text into HTML.
45 #define CONVERT_WEB_URLS GMIME_FILTER_HTML_CONVERT_URLS
46 #define CONVERT_ADDRSPEC GMIME_FILTER_HTML_CONVERT_ADDRESSES
52 { CONVERT_WEB_URLS, { "file://", "", url_file_start, url_file_end } },
53 { CONVERT_WEB_URLS, { "ftp://", "", url_web_start, url_web_end } },
54 { CONVERT_WEB_URLS, { "http://", "", url_web_start, url_web_end } },
55 { CONVERT_WEB_URLS, { "https://", "", url_web_start, url_web_end } },
56 { CONVERT_WEB_URLS, { "news://", "", url_web_start, url_web_end } },
57 { CONVERT_WEB_URLS, { "nntp://", "", url_web_start, url_web_end } },
58 { CONVERT_WEB_URLS, { "telnet://", "", url_web_start, url_web_end } },
59 { CONVERT_WEB_URLS, { "www.", "http://", url_web_start, url_web_end } },
60 { CONVERT_WEB_URLS, { "ftp.", "ftp://", url_web_start, url_web_end } },
61 { CONVERT_ADDRSPEC, { "@", "mailto:", url_addrspec_start, url_addrspec_end } },
64 #define NUM_URL_PATTERNS (sizeof (patterns) / sizeof (patterns[0]))
66 static void g_mime_filter_html_class_init (GMimeFilterHTMLClass *klass);
67 static void g_mime_filter_html_init (GMimeFilterHTML *filter, GMimeFilterHTMLClass *klass);
68 static void g_mime_filter_html_finalize (GObject *object);
70 static GMimeFilter *filter_copy (GMimeFilter *filter);
71 static void filter_filter (GMimeFilter *filter, char *in, size_t len, size_t prespace,
72 char **out, size_t *outlen, size_t *outprespace);
73 static void filter_complete (GMimeFilter *filter, char *in, size_t len, size_t prespace,
74 char **out, size_t *outlen, size_t *outprespace);
75 static void filter_reset (GMimeFilter *filter);
78 static GMimeFilterClass *parent_class = NULL;
82 g_mime_filter_html_get_type (void)
84 static GType type = 0;
87 static const GTypeInfo info = {
88 sizeof (GMimeFilterHTMLClass),
89 NULL, /* base_class_init */
90 NULL, /* base_class_finalize */
91 (GClassInitFunc) g_mime_filter_html_class_init,
92 NULL, /* class_finalize */
93 NULL, /* class_data */
94 sizeof (GMimeFilterHTML),
96 (GInstanceInitFunc) g_mime_filter_html_init,
99 type = g_type_register_static (GMIME_TYPE_FILTER, "GMimeFilterHTML", &info, 0);
107 g_mime_filter_html_class_init (GMimeFilterHTMLClass *klass)
109 GObjectClass *object_class = G_OBJECT_CLASS (klass);
110 GMimeFilterClass *filter_class = GMIME_FILTER_CLASS (klass);
112 parent_class = g_type_class_ref (GMIME_TYPE_FILTER);
114 object_class->finalize = g_mime_filter_html_finalize;
116 filter_class->copy = filter_copy;
117 filter_class->filter = filter_filter;
118 filter_class->complete = filter_complete;
119 filter_class->reset = filter_reset;
123 g_mime_filter_html_init (GMimeFilterHTML *filter, GMimeFilterHTMLClass *klass)
125 filter->scanner = url_scanner_new ();
130 filter->pre_open = FALSE;
134 g_mime_filter_html_finalize (GObject *object)
136 GMimeFilterHTML *html = (GMimeFilterHTML *) object;
138 url_scanner_free (html->scanner);
140 G_OBJECT_CLASS (parent_class)->finalize (object);
145 filter_copy (GMimeFilter *filter)
147 GMimeFilterHTML *html = (GMimeFilterHTML *) filter;
149 return g_mime_filter_html_new (html->flags, html->colour);
153 check_size (GMimeFilter *filter, char *outptr, char **outend, size_t len)
155 size_t outleft = (size_t) (*outend - outptr);
161 offset = outptr - filter->outbuf;
163 g_mime_filter_set_size (filter, filter->outsize + len, TRUE);
165 *outend = filter->outbuf + filter->outsize;
167 return filter->outbuf + offset;
171 citation_depth (const char *in)
173 register const char *inptr = in;
179 /* check that it isn't an escaped From line */
180 if (!strncmp (inptr, "From", 4))
183 while (*inptr != '\n') {
196 static inline gunichar
197 html_utf8_getc (const unsigned char **in, const unsigned char *inend)
199 register const unsigned char *inptr = *in;
200 register unsigned char c, r;
201 register gunichar u, m;
206 while (inptr < inend) {
212 } else if (r < 0xf8) { /* valid start char? */
214 m = 0x7f80; /* used to mask out the length bits */
220 if ((c & 0xc0) != 0x80) {
225 u = (u << 6) | (c & 0x3f);
242 writeln (GMimeFilter *filter, const char *in, const char *end, char *outptr, char **outend)
244 GMimeFilterHTML *html = (GMimeFilterHTML *) filter;
245 const unsigned char *instart = (const unsigned char *) in;
246 const unsigned char *inend = (const unsigned char *) end;
247 const unsigned char *inptr = instart;
249 while (inptr < inend) {
252 outptr = check_size (filter, outptr, outend, 16);
254 u = html_utf8_getc (&inptr, inend);
257 g_warning ("Invalid UTF-8 sequence encountered");
261 outptr = g_stpcpy (outptr, "<");
265 outptr = g_stpcpy (outptr, ">");
269 outptr = g_stpcpy (outptr, "&");
273 outptr = g_stpcpy (outptr, """);
277 if (html->flags & (GMIME_FILTER_HTML_CONVERT_SPACES)) {
279 outptr = check_size (filter, outptr, outend, 7);
280 outptr = g_stpcpy (outptr, " ");
282 } while (html->column % 8);
285 /* otherwise, FALL THROUGH */
287 if (html->flags & GMIME_FILTER_HTML_CONVERT_SPACES) {
288 if (inptr == (instart + 1) || (inptr < inend && (*inptr == ' ' || *inptr == '\t'))) {
289 outptr = g_stpcpy (outptr, " ");
294 /* otherwise, FALL THROUGH */
296 if (u >= 0x20 && u < 0x80) {
297 *outptr++ = (char) (u & 0xff);
299 if (html->flags & GMIME_FILTER_HTML_ESCAPE_8BIT)
302 outptr += sprintf (outptr, "&#%u;", u);
313 html_convert (GMimeFilter *filter, char *in, size_t inlen, size_t prespace,
314 char **out, size_t *outlen, size_t *outprespace, gboolean flush)
316 GMimeFilterHTML *html = (GMimeFilterHTML *) filter;
317 register char *inptr, *outptr;
318 char *start, *outend;
322 g_mime_filter_set_size (filter, inlen * 2 + 6, FALSE);
326 outptr = filter->outbuf;
327 outend = filter->outbuf + filter->outsize;
329 if (html->flags & GMIME_FILTER_HTML_PRE && !html->pre_open) {
330 outptr = g_stpcpy (outptr, "<pre>");
331 html->pre_open = TRUE;
335 while (inptr < inend && *inptr != '\n')
338 if (inptr == inend && !flush)
344 if (html->flags & GMIME_FILTER_HTML_MARK_CITATION) {
345 if ((depth = citation_depth (start)) > 0) {
348 /* FIXME: we could easily support multiple colour depths here */
350 g_snprintf (font, 25, "<font color=\"#%06x\">", html->colour);
352 outptr = check_size (filter, outptr, &outend, 25);
353 outptr = g_stpcpy (outptr, font);
354 } else if (*start == '>') {
358 } else if (html->flags & GMIME_FILTER_HTML_CITE) {
359 outptr = check_size (filter, outptr, &outend, 6);
360 outptr = g_stpcpy (outptr, "> ");
364 #define CONVERT_URLS_OR_ADDRESSES (GMIME_FILTER_HTML_CONVERT_URLS | GMIME_FILTER_HTML_CONVERT_ADDRESSES)
365 if (html->flags & CONVERT_URLS_OR_ADDRESSES) {
366 size_t matchlen, buflen, len;
372 if (url_scanner_scan (html->scanner, start, len, &match)) {
373 /* write out anything before the first regex match */
374 outptr = writeln (filter, start, start + match.um_so,
377 start += match.um_so;
380 matchlen = match.um_eo - match.um_so;
382 buflen = 20 + strlen (match.prefix) + matchlen + matchlen;
383 outptr = check_size (filter, outptr, &outend, buflen);
385 /* write out the href tag */
386 outptr = g_stpcpy (outptr, "<a href=\"");
387 outptr = g_stpcpy (outptr, match.prefix);
388 memcpy (outptr, start, matchlen);
390 outptr = g_stpcpy (outptr, "\">");
392 /* now write the matched string */
393 memcpy (outptr, start, matchlen);
394 html->column += matchlen;
399 /* close the href tag */
400 outptr = g_stpcpy (outptr, "</a>");
402 /* nothing matched so write out the remainder of this line buffer */
403 outptr = writeln (filter, start, start + len, outptr, &outend);
408 outptr = writeln (filter, start, inptr, outptr, &outend);
411 if ((html->flags & GMIME_FILTER_HTML_MARK_CITATION) && depth > 0) {
412 outptr = check_size (filter, outptr, &outend, 8);
413 outptr = g_stpcpy (outptr, "</font>");
416 if (html->flags & GMIME_FILTER_HTML_CONVERT_NL) {
417 outptr = check_size (filter, outptr, &outend, 5);
418 outptr = g_stpcpy (outptr, "<br>");
425 } while (inptr < inend);
428 if (html->pre_open) {
429 /* close the pre-tag */
430 outptr = check_size (filter, outptr, &outend, 10);
431 outptr = g_stpcpy (outptr, "</pre>");
433 } else if (start < inend) {
435 g_mime_filter_backup (filter, start, (unsigned) (inend - start));
438 *out = filter->outbuf;
439 *outlen = outptr - filter->outbuf;
440 *outprespace = filter->outpre;
444 filter_filter (GMimeFilter *filter, char *in, size_t len, size_t prespace,
445 char **out, size_t *outlen, size_t *outprespace)
447 html_convert (filter, in, len, prespace, out, outlen, outprespace, FALSE);
451 filter_complete (GMimeFilter *filter, char *in, size_t len, size_t prespace,
452 char **out, size_t *outlen, size_t *outprespace)
454 html_convert (filter, in, len, prespace, out, outlen, outprespace, TRUE);
458 filter_reset (GMimeFilter *filter)
460 GMimeFilterHTML *html = (GMimeFilterHTML *) filter;
463 html->pre_open = FALSE;
468 * g_mime_filter_html_new:
470 * @colour: citation colour
472 * Creates a new GMimeFilterHTML filter which can be used to convert a
473 * plain UTF-8 text stream into an html stream.
475 * Returns: a new html filter.
478 g_mime_filter_html_new (guint32 flags, guint32 colour)
480 GMimeFilterHTML *new;
483 new = g_object_newv (GMIME_TYPE_FILTER_HTML, 0, NULL);
485 new->colour = colour;
487 for (i = 0; i < NUM_URL_PATTERNS; i++) {
488 if (patterns[i].mask & flags)
489 url_scanner_add (new->scanner, &patterns[i].pattern);
492 return (GMimeFilter *) new;