1 ########################################################################
2 # SGMLSPL script to convert from the DocBook DTD to HTML pages.
4 # by David Megginson <dmeggins@aix1.uottawa.ca>
6 # This is a slightly more complicated script than tolatex.pl, since it
7 # uses forward references and an external reference file. Note that
8 # this script is customised for the SGMLS.pm and sgmlspl documentation
9 # in this directory, and is not meant as a general-purpose
10 # DocBook->HTML translator (though it could form the basis of one).
11 # Because each parse uses information saved from the last parse,
12 # you might need two passes to make certain that all references are
16 # Revision 1.4 1995/12/03 22:07:21 david
17 # Changed to use SGMLS::Output instead of Output, and to take advantage
18 # of the SGMLS::Refs package for forward references.
20 # Revision 1.3 1995/08/12 16:25:53 david
21 # Oops! Fixed comment leader in RCS file.
23 # Revision 1.2 1995/08/12 16:21:06 david
24 # Changes for release 1.01: fixed handling of prefixed sysid's from
27 ########################################################################
29 use SGMLS; # Use the SGMLS package.
30 use SGMLS::Output; # Use stack-based output.
33 $version = '$Id: tohtml.pl,v 1.4 1995/12/03 22:07:21 david Exp $';
35 $basename = shift; # Extra argument to script is basename.
38 # This conversion script keeps the idea of a current ID and a current
39 # file. Since the SGML document will be broken up into a series of
40 # smaller HTML documents, it is necessary to keep track of the current
41 # file name. The current ID is the ID (explicit or implied) of the
42 # most recent element which wants to capture titles, etc.
44 $current_id = ''; # The ID of the current container element.
45 @current_id_stack = (); # The IDs of any parent container elements.
46 $current_file = ''; # The name of the current output file.
47 @current_file_stack = (); # The names of the parent output files.
49 $top_id = ''; # The ID of the top element.
50 $top_file = ''; # The ID of the top file.
52 $previous_file = ''; # The previous file on the same level.
54 $table_counter = 0; # The number of the current table.
59 ########################################################################
60 # Handler declarations for sgmlspl.
61 ########################################################################
64 # Use the 'start' and 'end' handlers of the document to begin and
65 # terminate reference handling.
68 system("touch .redo_$basename");
69 # Start up the reference manager.
70 $Refs = new SGMLS::Refs("$basename.refs");
73 unlink(".redo_$basename") unless $Refs->warn;
78 # The <ARTICLE> is the top-level element.
80 sgml('<ARTICLE>', sub {
82 $top_id = $current_id;
83 $top_file = $current_file;
85 sgml('</ARTICLE>', sub { end_html(); });
88 # Ignore all of the header except for the bits which we actually want,
89 # by pushing output to 'nul'.
91 sgml('<ARTHEADER>', sub { push_output 'nul'; });
92 sgml('</ARTHEADER>', sub { pop_output(); });
95 # Save the title of something for future reference.
97 sgml('<TITLE>', sub { push_output 'string'; });
98 sgml('</TITLE>', sub { $Refs->put("title:$current_id",pop_output()); });
101 # These are just containers in the <ARTHEADER>.
103 sgml('<AUTHORGROUP>', "");
104 sgml('</AUTHORGROUP>', "");
105 sgml('<AUTHOR>', "");
106 sgml('</AUTHOR>', "");
107 sgml('<AFFILIATION>', "");
108 sgml('</AFFILIATION>', "");
109 sgml('<ADDRESS>', "");
110 sgml('</ADDRESS>', "");
111 sgml('<ARTPAGENUMS>', "");
112 sgml('</ARTPAGENUMS>', "");
115 # Save the author's first name for future reference.
117 sgml('<FIRSTNAME>', sub { push_output 'string'; });
118 sgml('</FIRSTNAME>', sub { $Refs->put("firstname:$current_id",pop_output()); });
121 # Save the author's surname for future reference.
123 sgml('<SURNAME>', sub { push_output 'string'; });
124 sgml('</SURNAME>', sub { $Refs->put("surname:$current_id",pop_output()); });
127 # Save the organisation name for future reference.
129 sgml('<ORGNAME>', sub { push_output 'string'; });
130 sgml('</ORGNAME>', sub { $Refs->put("orgname:$current_id",pop_output()); });
133 # Save the organisation division for future reference.
135 sgml('<ORGDIV>', sub { push_output 'string'; });
136 sgml('</ORGDIV>', sub { $Refs->put("orgdiv:$current_id",pop_output()); });
139 # Save the email address for future reference.
141 sgml('<EMAIL>', sub { push_output('string'); });
142 sgml('</EMAIL>', sub { $Refs->put("email:$current_id",pop_output()); });
147 # Sectioning elements -- all of these simply call the &start_html
148 # and &end_html subroutines, which do all of the real work.
150 sgml('<IMPORTANT>', sub { start_html(shift); });
151 sgml('</IMPORTANT>', sub { end_html(); });
152 sgml('<SECT1>', sub { start_html(shift); });
153 sgml('</SECT1>', sub { end_html(); });
154 sgml('<SECT2>', sub { start_html(shift); });
155 sgml('</SECT2>', sub { end_html(); });
156 sgml('<SECT3>', sub { start_html(shift); });
157 sgml('</SECT3>', sub { end_html(); });
158 sgml('<SECT4>', sub { start_html(shift); });
159 sgml('</SECT4>', sub { end_html(); });
160 sgml('<SECT5>', sub { start_html(shift); });
161 sgml('</SECT5>', sub { end_html(); });
165 # Paragraphs must be marked explicitly in HTML -- use the HTML 3
166 # practice (actually just _real_ SGML, for a change) of marking both
167 # the beginning and the end.
169 sgml('<PARA>', "<P>");
170 sgml('</PARA>', "</P>\n\n");
180 # This is an internal cross reference -- get the URL by
181 # simply adding ".html" to the IDREF (note that this would not work
187 output lc($element->attribute(LINKEND)->value) . ".html";
190 sgml('</LINK>', "</A>");
193 # This is an external cross-reference, with a supplied URL.
195 sgml('<ULINK>', sub {
198 output $element->attribute(URL)->value;
201 sgml('</ULINK>', "</A>");
205 # This is a pointer to something (in this case, always a table).
209 output $Refs->get('xref:' . lc($element->attribute(LINKEND)->value));
220 # Print application names in typewriter.
222 sgml('<APPLICATION>', "<TT>");
223 sgml('</APPLICATION>', "</TT>");
226 # Print acronyms in bold.
228 sgml('<ACRONYM>', "<B>");
229 sgml('</ACRONYM>', "</B>");
232 # Print terms in italics.
234 sgml('<GLOSSTERM>', "<I>");
235 sgml('</GLOSSTERM>', "</I>");
238 # Print file names in typewriter.
240 sgml('<FILENAME>', "<TT>");
241 sgml('</FILENAME>', "</TT>");
244 # Print symbols in typewriter.
246 sgml('<SYMBOL>', "<TT>");
247 sgml('</SYMBOL>', "</TT>");
250 # Print return values in typewriter.
252 sgml('<RETURNVALUE>', "<TT>");
253 sgml('</RETURNVALUE>', "</TT>");
256 # Print quotations in quotation marks.
258 sgml('<QUOTE>', '"');
259 sgml('</QUOTE>', '"');
262 # Print commands in typewriter.
264 sgml('<COMMAND>', "<TT>");
265 sgml('</COMMAND>', "</TT>");
268 # Print parameters in typewriter.
270 sgml('<PARAMETER>', "<IT>");
271 sgml('</PARAMETER>', "</IT>");
274 # Print literal elements in typewriter.
276 sgml('<LITERAL>', "<TT>");
277 sgml('</LITERAL>', "</TT>");
280 # Print class names in typewriter.
282 sgml('<CLASSNAME>', "<TT>");
283 sgml('</CLASSNAME>', "</TT>");
286 # Emphasise emphasis.
288 sgml('<EMPHASIS>', "<EM>");
289 sgml('</EMPHASIS>', "</EM>");
298 # Program listings are preformatted.
300 sgml('<PROGRAMLISTING>', "<P>\n<PRE>");
301 sgml('</PROGRAMLISTING>', "</PRE>\n</P>\n");
304 # Keep a counter for table numbers, note the ID, and look up the
305 # title (caption) for the table.
307 sgml('<TABLE>', sub {
309 push @current_id_stack, $current_id;
310 $current_id = lc($element->attribute(ID)->value || gen_id());
312 $Refs->put("xref:$current_id",$table_counter);
313 output "\n<H3>Table $table_counter: "
314 . $Refs->get("title:$current_id") . "</H3>\n\n";
316 sgml('</TABLE>', sub {
318 $current_id = pop @current_id_stack;
322 # Nothing needs to be done here -- we don't care how many cells there are.
324 sgml('<TGROUP>', "");
325 sgml('</TGROUP>', "");
328 # We will keep track of all of the entries in the head, for later use.
330 sgml('<THEAD>', sub { @cell_headings = (); push_output('nul'); });
331 sgml('</THEAD>', sub { pop_output(); });
334 # Print a single horizontal rule before the beginning of the body.
336 sgml('<TBODY>', "<HR>");
337 sgml('</TBODY>', "");
340 # Make each row into a labelled list (!!) -- HTML 3 does have tables,
341 # but they might not be able to handle the paragraph-length entries
342 # which I used in my documentation (these will not print if we are
343 # in the <THEAD>, since output will be 'nul').
349 sgml('</ROW>', "\n</DL>\n<HR>\n\n");
352 # If an entry is in the <THEAD>, save it for later use; otherwise,
353 # print the entry as a list item with its corresponding <THEAD> entry
356 sgml('<ENTRY>', sub {
358 if ($element->within(THEAD)) {
359 push_output 'string';
362 output $cell_headings[$cell_counter];
363 output "</B></DT>\n<DD>";
366 sgml('</ENTRY>', sub {
368 if ($element->within(THEAD)) {
369 $cell_headings[$cell_counter] = pop_output();
378 ########################################################################
379 # SDATA Handlers -- use HTML entities wherever possible.
380 ########################################################################
382 sgml('|[lt ]|', "<");
383 sgml('|[gt ]|', ">");
384 sgml('|[mdash ]|', "--");
385 sgml('|[LaTeX]|', "LaTeX");
386 sgml('|[hellip]|', "...");
387 sgml('|[amp ]|', "&");
391 ########################################################################
392 # The generic external data entity handler. Handle only entities
393 # with type CDATA, and simply dump their files into the current
394 # document with minimal conversion.
395 ########################################################################
399 # Use the first generated filename
400 # or the system identifier.
401 my $filename = $entity->filenames->[0] || $entity->sysid;
402 # A strange, NSGMLS-thing.
403 if ($filename =~ /^FILE:/ || $filename =~ /^\<FILE[^>]+\>/) {
408 if ($entity->type eq 'CDATA') {
411 unless (open INPUT, "<$filename") {
412 die "Cannot open external file $filename\n";
414 # Convert special SGML characters.
423 die "Cannot read file $filename\n";
426 die "Cannot handle external entity with type " . $entity->type . "\n";
432 ########################################################################
433 # Default handlers -- these will pick up any unrecognised elements,
434 # SDATA strings, processing instructions, or subdocument entities,
435 # and report an error to the user.
436 #########################################################################
439 sgml('start_element',sub { die "Unknown element: " . $_[0]->name; });
440 sgml('sdata',sub { die "Unknown SDATA: " . $_[0]; });
441 sgml('pi',sub { die "Unknown processing instruction: " . $_[0]; });
442 sgml('start_subdoc',sub { die "Unknown subdoc entity: " . $_[0]->name; });
446 # End of sgmlspl handler declarations.
450 ########################################################################
451 # Utility procedures.
452 ########################################################################
456 # Given an element, start a new HTML document for it.
460 my $old_file = $current_file;
462 # Save the old values on the stack.
463 push @current_id_stack, $current_id;
464 push @current_file_stack, $current_file;
466 # Get the new ID and file.
467 $current_id = lc($element->attribute(ID)->value || gen_id());
468 $current_file = $current_id . '.html';
470 # Note the previous child, if any.
471 if ($previous_file) {
472 $Refs->put("previous:$current_file",$previous_file);
473 $Refs->put("next:$previous_file",$current_file);
477 # Put a reference up to the parent.
479 $Refs->put("up:$current_file",$old_file);
482 # Look up the title reference.
483 my $plaintitle = my $title = $Refs->get("title:$current_id");
486 $plaintitle =~ s/\<[^\>]+\>//g;
488 # If this is not the top-level
489 # file, send out a link
490 # before beginning the new file.
492 output "<LI><A HREF=\"$current_file\">$plaintitle</A></LI>\n";
495 # Send output to the new file.
496 push_output('file',$current_file);
498 # Print the front matter.
499 output "<HTML>\n<HEAD>\n<TITLE>$plaintitle</TITLE>\n</HEAD>\n<BODY>\n";
501 # Navigational aids, if this is not
502 # the top-level file.
504 output "\n<P><B>Links</B>: ";
505 my $up = $Refs->get("up:$current_file");
506 my $previous = $Refs->get("previous:$current_file");
507 my $next = $Refs->get("next:$current_file");
508 output "<A HREF=$next>Next</A> " if $next;
509 output "<A HREF=$previous>Previous</A> " if $previous;
510 output "<A HREF=$up>Up</A> " if $up;
511 output "<A HREF=$top_file>Top</A>";
516 output "<H1>$title</H1>\n\n";
520 # End the HTML document.
523 # Look up the name and email info.
524 my $firstname = $Refs->get("firstname:$current_id") ||
525 $Refs->get("firstname:$top_id");
526 my $surname = $Refs->get("surname:$current_id") ||
527 $Refs->get("surname:$top_id");
528 my $email = $Refs->get("email:$current_id") ||
529 $Refs->get("email:$top_id");
531 # Restore the previous ID and file,
532 # and note this as the previous
534 $previous_file = $current_file;
535 $current_id = pop @current_id_stack;
536 $current_file = pop @current_file_stack;
538 # If this is not the top-level file,
539 # add some navigational information.
541 output "\n<P><B>Links</B>: ";
542 my $up = $Refs->get("up:$previous_file");
543 my $previous = $Refs->get("previous:$previous_file");
544 my $next = $Refs->get("next:$previous_file");
545 output "<A HREF=$next>Next</A> " if $next;
546 output "<A HREF=$previous>Previous</A> " if $previous;
547 output "<A HREF=$up>Up</A> " if $up;
548 output "<A HREF=$top_file>Top</A>";
552 # Add an address, if available,
553 # including a MAILTO URL.
554 output "\n<ADDRESS>";
555 output "$firstname $surname " if $firstname || $surname;
556 output "<A HREF=\"mailto:$email\"><$email></A>" if $email;
557 output "</ADDRESS>\n</BODY>\n</HTML>\n";
562 # Generate a new ID for anything which does not already have one.
566 return "node$id_counter";