3 # Script to turn PCRE man pages into HTML
6 # Subroutine to handle font changes and other escapes
11 $s =~ s/</</g; # Deal with < and >
13 $s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
14 $s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
16 $s =~ s/(?<=Copyright )\(c\)/©/g;
20 # Subroutine to ensure not in a paragraph
25 print TEMP "</PRE>\n" if ($inpre);
32 # Subroutine to start a new paragraph
50 while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
52 $toc = 1 if $ARGV[0] eq "-toc";
56 # Initial output to STDOUT
61 <title>$ARGV[0] specification</title>
63 <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
64 <h1>$ARGV[0] man page</h1>
66 Return to the <a href="index.html">PCRE index page</a>.
69 This page is part of the PCRE HTML documentation. It was generated automatically
70 from the original man page. If there is any nonsense in it, please consult the
71 man page, in case the conversion went wrong.
75 print "<ul>\n" if ($toc);
77 open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
81 # Handle lines beginning with a dot
85 # Some of the PCRE man pages used to contain instances of .br. However,
86 # they should have all been removed because they cause trouble in some
87 # (other) automated systems that translate man pages to HTML. Complain if
88 # we find .br or .in (another macro that is deprecated).
90 if (/^\.br/ || /^\.in/)
92 print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
93 print STDERR "*** $_\n";
94 die "*** Processing abandoned\n";
97 # Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
109 # Handling .sp is subtle. If it is inside a literal section, do nothing if
110 # the next line is a non literal text line; similarly, if not inside a
111 # literal section, do nothing if a literal follows. The point being that
112 # the <pre> and </pre> that delimit literal sections will do the spacing.
113 # Always skip if no previous output.
122 print TEMP "\n" if (/^[\s.]/);
126 print TEMP "<br>\n<br>\n" if (!/^[\s.]/);
128 redo; # Now process the lookahead line we just read
131 elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
135 elsif (/^\.SH\s*("?)(.*)\1/)
137 # Ignore the NAME section
145 my($title) = &do_line($2);
148 printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
150 printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
156 print TEMP "<br><b>\n$title\n</b><br>\n";
159 elsif (/^\.SS\s*("?)(.*)\1/)
162 my($title) = &do_line($2);
163 print TEMP "<br><b>\n$title\n</b><br>\n";
165 elsif (/^\.B\s*(.*)/)
167 &new_para() if (!$inpara);
170 print TEMP "<b>$_</b>\n";
173 elsif (/^\.I\s*(.*)/)
175 &new_para() if (!$inpara);
178 print TEMP "<i>$_</i>\n";
182 # A comment that starts "HREF" takes the next line as a name that
183 # is turned into a hyperlink, using the text given, which might be
184 # in a special font. If it ends in () or (digits) or punctuation, they
185 # aren't part of the link.
187 elsif (/^\.\\"\s*HREF/)
193 $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
194 print TEMP "<a href=\"$1.html\">$_</a>\n";
197 # A comment that starts "HTML" inserts literal HTML
199 elsif (/^\.\\"\s*HTML\s*(.*)/)
204 # A comment that starts < inserts that HTML at the end of the
205 # *next* input line - so as not to get a newline between them.
207 elsif (/^\.\\"\s*(<.*>)/)
214 print TEMP "$_$markup\n";
217 # A comment that starts JOIN joins the next two lines together, with one
218 # space between them. Then that line is processed. This is used in some
219 # displays where two lines are needed for the "man" version. JOINSH works
220 # the same, except that it assumes this is a shell command, so removes
221 # continuation backslashes.
223 elsif (/^\.\\"\s*JOIN(SH)?/)
228 $one =~ s/\s*\\e\s*$// if (defined($1));
232 redo; # Process the joined lines
235 # .EX/.EE are used in the pcredemo page to bracket the entire program,
236 # which is unmodified except for turning backslash into "\e".
240 print TEMP "<PRE>\n";
252 # Ignore anything not recognized
257 # Line does not begin with a dot. Replace blank lines with new paragraphs
261 &end_para() if ($wrotetext);
265 # Convert fonts changes and output an ordinary line. Ensure that indented
266 # lines are marked as literal.
269 &new_para() if (!$inpara);
275 print TEMP "<pre>\n";
281 print TEMP "</pre>\n";
285 # Add <br> to the end of a non-literal line if we are within .nf/.fi
287 $_ .= "<br>\n" if (!$inpre && $innf);
293 # The TOC, if present, will have been written - terminate it
295 print "</ul>\n" if ($toc);
297 # Copy the remainder to the standard output
300 open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
302 print while (<TEMP>);
306 Return to the <a href="index.html">PCRE index page</a>.