3 # Translate really simple html to googlecode.com wiki.
5 # Usage: cat input.html | html2wiki.sh > outputwiki.txt
7 # Most of this script is simple sed substitutions with an awk script to handle
10 # Awk program to escape all instances of * outside of <listing></listing>
12 BEGIN { in_listing = 0; }
13 /<[Ll][Ii][Ss][Tt][Ii][Nn][Gg]>/ { in_listing = 1; }
14 /<\/[Ll][Ii][Ss][Tt][Ii][Nn][Gg]>/ { in_listing = 0; }
19 print gensub("*", "`*`", "g", $0)
22 # Awk program to convert hierachical unordered and ordered lists into
23 # googlecode wiki list markup. This is limited to converting very simple
24 # html lists in the form:
32 # This script also removes leading spaces from all lines outside of <listing>
37 list_type_ordered = 1;
38 list_type_unordered = 2;
39 # Number of nested lists.
41 # Number of items in the list.
42 list_items[list_depth] = 0;
44 list_type[list_depth] = list_type_none;
45 # Do nott strip whitespace from listing sections.
49 # Generate a string of indent spaces.
50 function list_indent(indent) {
51 format = sprintf("%%%ds", indent);
52 return sprintf(format, "");
55 /<[Ll][Ii][Ss][Tt][Ii][Nn][Gg]>/ { in_listing = 1; }
56 /<\/[Ll][Ii][Ss][Tt][Ii][Nn][Gg]>/ { in_listing = 0; }
58 # Process all lines non-blank lines.
60 # Remove leading white space.
62 output_string = gensub(/^ */, "", 1, $0);
66 search_string = output_string
68 # Replace list tags with googlecode wiki markup.
69 while (match(search_string, /<[^>]*>/, matches)) {
71 search_string = substr(search_string,
72 matches[0, "start"] + matches[0, "length"]);
73 if (match(tag, /^<[Uu][Ll]>$/)) {
75 list_type[list_depth] = list_type_unordered;
76 list_items[list_depth] = 0;
77 output_string = gensub(tag, "", 1, output_string);
78 } else if (match(tag, /^[Oo][Ll]>$/)) {
80 list_type[list_depth] = list_type_ordered;
81 list_items[list_depth] = 0;
82 output_string = gensub(tag, "", 1, output_string);
83 } else if (match(tag, /^<\/[Ll][Ii]>$/)) {
84 output_string = gensub(tag, "", 1, output_string);
85 } else if (list_depth) {
86 if (match(tag, /^<[Ll][Ii]>$/)) {
87 if (list_type[list_depth] == list_type_unordered) {
88 output_string = gensub(tag, list_indent(list_depth) "* ", 1,
90 } else if (list_type[list_depth] == list_type_ordered) {
91 output_string = gensub(tag, list_indent(list_depth) "# ", 1,
94 } else if (match(tag, /^<\/[Uu][Ll]>$/) ||
95 match(tag, /^<\/[Ou][Ll]>$/)) {
96 output_string = gensub(tag, "", 1, output_string);
101 # If a list is being parsed then filter blank lines.
102 if (list_depth == 0 || length(output_string)) {
107 # This sed program translates really simple html into wiki suitable for
120 # <a href="#.*">.*</a>
121 # <a href=".*">.*</a>
122 # <a name=".*'>.*</a>
124 # Supported entities:
129 # * Anchors must be on a single line and must contain one of either the name or
131 # * All external links are relative to
132 # http://cmockery.googlecode.com/svn/trunk/doc/
143 s@<[Ll][Ii][Ss][Tt][Ii][Nn][Gg]>@{{{@g;
144 s@</[Ll][Ii][Ss][Tt][Ii][Nn][Gg]>@}}}@g;
145 s@<[Aa].*?href="#(.*)?">(.*)?</[Aa]>@[#\1 \2]@g;
146 s@<[Aa].*?href="(.*)?">(.*)?</[Aa]>@[http://cmockery.googlecode.com/svn/trunk/doc/\1 \2]@g;
147 s@<[Aa].*?name="(.*)?">@@g;