4 #+##############################################################################
8 # Description: Program to transform most Texinfo documents to HTML #
10 #-##############################################################################
12 # @(#)texi2html 1.52 01/05/98 Written (mainly) by Lionel Cons, Lionel.Cons@cern.ch
14 # The man page for this program is included at the end of this file and can be
15 # viewed using the command 'nroff -man texi2html'.
16 # Please read the copyright at the end of the man page.
18 #+++############################################################################
22 #---############################################################################
32 $BIBRE = '\[[\w\/-]+\]'; # RE for a bibliography reference
33 $FILERE = '[\/\w.+-]+'; # RE for a file name
34 $VARRE = '[^\s\{\}]+'; # RE for a variable name
35 $NODERE = '[^@{}:\'`",]+'; # RE for a node name
36 $NODESRE = '[^@{}:\'`"]+'; # RE for a list of node names
37 $XREFRE = '[^@{}]+'; # RE for a xref (should use NODERE)
39 $ERROR = "***"; # prefix for errors and warnings
40 $THISPROG = "texi2html 1.52"; # program name and version
41 $HOMEPAGE = "http://wwwinfo.cern.ch/dis/texi2html/"; # program home page
42 $TODAY = &pretty_date; # like "20 September 1993"
43 $SPLITTAG = "<!-- SPLIT HERE -->\n"; # tag to know where to split
44 $PROTECTTAG = "_ThisIsProtected_"; # tag to recognize protected sections
45 $html2_doctype = '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0 Strict Level 2//EN">';
48 # language dependent constants
51 #$LDC_SECTION = 'section';
53 #$LDC_TOC = 'Table of Contents';
54 #$LDC_GOTO = 'Go to the';
55 #$LDC_FOOT = 'Footnotes';
56 # TODO: @def* shortcuts
83 # texinfo section names to level
98 'unnumberedsubsec', 3,
102 'unnumberedsubsubsec', 4,
104 'appendixsubsubsec', 4,
108 # accent map, TeX command to ISO name
119 # texinfo "simple things" (@foo) to HTML ones
136 # texinfo "things" (@foo{}) to HTML ones
140 'br', '<P>', # paragraph break
155 # texinfo styles (@foo{bar}) to HTML ones
162 'ctrl', '&do_ctrl', # special case
163 'dfn', 'EM', # DFN tag is illegal in the standard
165 'email', '&do_email', # insert a clickable email address
167 'file', '"TT', # will put quotes, cf. &apply_style
172 'r', '', # unsupported
173 'samp', '"SAMP', # will put quotes, cf. &apply_style
174 'sc', '&do_sc', # special case
177 'titlefont', '', # useless
178 'uref', '&do_uref', # insert a clickable URL
179 'url', '&do_url', # insert a clickable URL
181 'w', '', # unsupported
185 # texinfo format (@foo/@end foo) to HTML ones
192 'quotation', 'BLOCKQUOTE',
193 'smallexample', 'PRE',
204 # texinfo definition shortcuts to real ones
224 'defun', 'deffn Function',
225 'defmac', 'deffn Macro',
226 'defspec', 'deffn {Special Form}',
227 'defvar', 'defvr Variable',
228 'defopt', 'defvr {User Option}',
229 'deftypefun', 'deftypefn Function',
230 'deftypevar', 'deftypevr Variable',
231 'defivar', 'defcv {Instance Variable}',
232 'defmethod', 'defop Method',
234 'defunx', 'deffnx Function',
235 'defmacx', 'deffnx Macro',
236 'defspecx', 'deffnx {Special Form}',
237 'defvarx', 'defvrx Variable',
238 'defoptx', 'defvrx {User Option}',
239 'deftypefunx', 'deftypefnx Function',
240 'deftypevarx', 'deftypevrx Variable',
241 'defivarx', 'defcvx {Instance Variable}',
242 'defmethodx', 'defopx Method',
255 'summarycontents', 1,
261 # unsupported commands (formatting)
268 'setchapternewpage', 1,
278 'paragraphindent', 1,
279 # unsupported formats
286 #+++############################################################################
288 # Argument parsing, initialisation #
290 #---############################################################################
292 %value = (); # hold texinfo variables, see also -D
294 $use_bibliography = 1;
301 $invisible_mark = '';
305 $number_sections = 0;
312 To convert a Texinfo file to HMTL: $0 [options] file
313 where options can be:
314 -expandinfo : use \@ifinfo sections, not \@iftex
315 -glossary : handle a glossary
316 -invisible name: use 'name' as an invisible anchor
317 -Dname : define name like with \@set
318 -I dir : search also for files in 'dir'
320 -monolithic : output only one file including ToC
321 -number : number sections
322 -split_chapter : split on main sections
323 -split_node : split on nodes
324 -usage : print usage instructions
325 -verbose : verbose output
326 To check converted files: $0 -check [-verbose] files
329 while (@ARGV && $ARGV[0] =~ /^-/) {
331 if (/^-acc$/) { $use_acc = 1; next; }
332 if (/^-d(ebug)?(\d+)?$/) { $debug = $2 || shift(@ARGV); next; }
333 if (/^-doctype$/) { $doctype = shift(@ARGV); next; }
334 if (/^-c(heck)?$/) { $check = 1; next; }
335 if (/^-e(xpandinfo)?$/) { $expandinfo = 1; next; }
336 if (/^-g(lossary)?$/) { $use_glossary = 1; next; }
337 if (/^-i(nvisible)?$/) { $invisible_mark = shift(@ARGV); next; }
338 if (/^-iso$/) { $use_iso = 1; next; }
339 if (/^-D(.+)?$/) { $value{$1 || shift(@ARGV)} = 1; next; }
340 if (/^-I(.+)?$/) { push(@include_dirs, $1 || shift(@ARGV)); next; }
341 if (/^-m(enu)?$/) { $show_menu = 1; next; }
342 if (/^-mono(lithic)?$/) { $monolithic = 1; next; }
343 if (/^-n(umber)?$/) { $number_sections = 1; next; }
344 if (/^-s(plit)?_?(n(ode)?|c(hapter)?)?$/) {
352 if (/^-v(erbose)?$/) { $verbose = 1; next; }
356 die $usage unless @ARGV > 0;
361 if (($split_node || $split_chapter) && $monolithic) {
362 warn "Can't use -monolithic with -split, -monolithic ignored.\n";
366 $to_skip{'ifinfo'}++;
367 $to_skip{'end ifinfo'}++;
370 $to_skip{'end iftex'}++;
372 $invisible_mark = '<IMG SRC="invisible.xbm">' if $invisible_mark eq 'xbm';
373 die $usage unless @ARGV == 1;
374 $docu = shift(@ARGV);
375 if ($docu =~ /.*\//) {
376 chop($docu_dir = $&);
382 unshift(@include_dirs, $docu_dir);
383 $docu_name =~ s/\.te?x(i|info)?$//; # basename of the document
385 $docu_doc = "$docu_name.html"; # document's contents
387 $docu_toc = $docu_foot = $docu_doc;
389 $docu_toc = "${docu_name}_toc.html"; # document's table of contents
390 $docu_foot = "${docu_name}_foot.html"; # document's footnotes
396 $value{'html'} = 1; # predefine html (the output format)
397 $value{'texi2html'} = '1.52'; # predefine texi2html (the translator)
398 # _foo: internal to track @foo
399 foreach ('_author', '_title', '_subtitle',
400 '_settitle', '_setfilename') {
401 $value{$_} = ''; # prevent -w warnings
403 %node2sec = (); # node to section name
404 %node2href = (); # node to HREF
405 %bib2href = (); # bibliography reference to HREF
406 %gloss2href = (); # glossary term to HREF
407 @sections = (); # list of sections
408 %tag2pro = (); # protected sections
422 # can I use ISO8879 characters? (HTML+)
425 $things_map{'bullet'} = "•";
426 $things_map{'copyright'} = "©";
427 $things_map{'dots'} = "…";
428 $things_map{'equiv'} = "≡";
429 $things_map{'expansion'} = "→";
430 $things_map{'point'} = "∗";
431 $things_map{'result'} = "⇒";
435 # read texi2html extensions (if any)
437 $extensions = 'texi2html.ext'; # extensions in working directory
438 if (-f $extensions) {
439 print "# reading extensions from $extensions\n" if $verbose;
440 require($extensions);
442 ($progdir = $0) =~ s/[^\/]+$//;
443 if ($progdir && ($progdir ne './')) {
444 $extensions = "${progdir}texi2html.ext"; # extensions in texi2html directory
445 if (-f $extensions) {
446 print "# reading extensions from $extensions\n" if $verbose;
447 require($extensions);
451 print "# reading from $docu\n" if $verbose;
453 #+++############################################################################
455 # Pass 1: read source, handle command, variable, simple substitution #
457 #---############################################################################
459 @lines = (); # whole document
460 @toc_lines = (); # table of contents
461 $toplevel = 0; # top level seen in hierarchy
462 $curlevel = 0; # current level in TOC
463 $node = ''; # current node name
464 $in_table = 0; # am I inside a table
465 $table_type = ''; # type of table ('', 'f', 'v', 'multi')
466 @tables = (); # nested table support
467 $in_bibliography = 0; # am I inside a bibliography
468 $in_glossary = 0; # am I inside a glossary
469 $in_top = 0; # am I inside the top node
470 $in_pre = 0; # am I inside a preformatted section
471 $in_list = 0; # am I inside a list
472 $in_html = 0; # am I inside an HTML section
473 $first_line = 1; # is it the first line
474 $dont_html = 0; # don't protect HTML on this line
475 $split_num = 0; # split index
476 $deferred_ref = ''; # deferred reference for indexes
477 @html_stack = (); # HTML elements stack
478 $html_element = ''; # current HTML element
481 # build code for simple substitutions
482 # the maps used (%simple_map and %things_map) MUST be aware of this
483 # watch out for regexps, / and escaped characters!
485 foreach (keys(%simple_map)) {
486 ($re = $_) =~ s/(\W)/\\$1/g; # protect regexp chars
487 $subst_code .= "s/\\\@$re/$simple_map{$_}/g;\n";
489 foreach (keys(%things_map)) {
490 $subst_code .= "s/\\\@$_\\{\\}/$things_map{$_}/g;\n";
493 # accentuated characters
494 foreach (keys(%accent_map)) {
496 $subst_code .= "s/$;3";
497 } elsif ($_ eq "'") {
498 $subst_code .= "s/$;4";
500 $subst_code .= "s/\\\@\\$_";
502 $subst_code .= "([aeiou])/&\${1}$accent_map{$_};/gi;\n";
505 eval("sub simple_substitutions { $subst_code }");
508 while ($_ = &next_line) {
510 # remove \input on the first lines only
521 if (/^\@end\s+(\w+)\b/) {
523 } elsif (/^\@(\w+)\b/) {
527 # handle @ifhtml / @end ifhtml
530 if ($end_tag eq 'ifhtml') {
533 $tag2pro{$in_html} .= $_;
536 } elsif ($tag eq 'ifhtml') {
537 $in_html = $PROTECTTAG . ++$html_num;
538 push(@lines, $in_html);
542 # try to skip the line
545 next if $to_skip{"end $end_tag"};
547 next if $to_skip{$tag};
548 last if $tag eq 'bye';
551 # parsing the top node
552 if ($tag eq 'node' || $tag eq 'include' || $sec2level{$tag}) {
561 # try to remove inlined comments
562 # syntax from tex-mode.el comment-start-skip
564 s/((^|[^\@])(\@\@)*)\@c(omment)? .*/$1/;
565 # non-@ substitutions cf. texinfmt.el
569 s/([\w ])---([\w ])/$1--$2/g;
576 &skip_until($tag), next if $tag eq 'ignore';
578 &skip_until($tag), next if $tag eq 'iftex';
580 &skip_until($tag), next if $tag eq 'ifinfo';
582 &skip_until($tag), next if $tag eq 'tex';
583 # handle special tables
584 if ($tag =~ /^(|f|v|multi)table$/) {
589 if ($tag eq 'top' || ($tag eq 'node' && /^\@node\s+top\s*,/i)) {
591 @lines = (); # ignore all lines before top (title page garbage)
593 } elsif ($tag eq 'node') {
595 warn "$ERROR Bad node line: $_" unless $_ =~ /^\@node\s$NODESRE$/o;
596 $_ = &protect_html($_); # if node contains '&' for instance
598 ($node) = split(/,/);
599 &normalise_node($node);
602 push(@lines, $SPLITTAG) if $split_num++;
603 push(@sections, $node);
606 } elsif ($tag eq 'include') {
607 if (/^\@include\s+($FILERE)\s*$/o) {
610 foreach $dir (@include_dirs) {
617 print "# including $file\n" if $verbose;
619 warn "$ERROR Can't find $file, skipping";
622 warn "$ERROR Bad include line: $_";
625 } elsif ($tag eq 'ifclear') {
626 if (/^\@ifclear\s+($VARRE)\s*$/o) {
627 next unless defined($value{$1});
630 warn "$ERROR Bad ifclear line: $_";
633 } elsif ($tag eq 'ifset') {
634 if (/^\@ifset\s+($VARRE)\s*$/o) {
635 next if defined($value{$1});
638 warn "$ERROR Bad ifset line: $_";
641 } elsif ($tag eq 'menu') {
642 unless ($show_menu) {
647 push(@lines, &html_debug("\n", __LINE__));
648 } elsif ($format_map{$tag}) {
649 $in_pre = 1 if $format_map{$tag} eq 'PRE';
650 &html_push_if($format_map{$tag});
651 push(@lines, &html_debug("\n", __LINE__));
652 $in_list++ if $format_map{$tag} eq 'UL' || $format_map{$tag} eq 'OL' ;
653 push(@lines, &debug("<$format_map{$tag}>\n", __LINE__));
655 } elsif ($tag eq 'table') {
656 if (/^\@(|f|v|multi)table\s+\@(\w+)/) {
658 unshift(@tables, join($;, $table_type, $in_table));
659 if ($table_type eq "multi") {
660 push(@lines, &debug("<TABLE BORDER>\n", __LINE__));
661 &html_push_if('TABLE');
663 push(@lines, &debug("<DL COMPACT>\n", __LINE__));
666 push(@lines, &html_debug("\n", __LINE__));
668 warn "$ERROR Bad table line: $_";
671 } elsif ($tag eq 'synindex' || $tag eq 'syncodeindex') {
672 if (/^\@$tag\s+(\w)\w\s+(\w)\w\s*$/) {
673 eval("*${1}index = *${2}index");
675 warn "$ERROR Bad syn*index line: $_";
678 } elsif ($tag eq 'sp') {
679 push(@lines, &debug("<P>\n", __LINE__));
681 } elsif ($tag eq 'setref') {
682 &protect_html; # if setref contains '&' for instance
683 if (/^\@$tag\s*{($NODERE)}\s*$/) {
685 $setref =~ s/\s+/ /g; # normalize
687 $node2sec{$setref} = $name;
688 $node2href{$setref} = "$docu_doc#$docid";
690 warn "$ERROR Bad setref line: $_";
693 } elsif ($tag eq 'defindex' || $tag eq 'defcodeindex') {
694 if (/^\@$tag\s+(\w\w)\s*$/) {
695 $valid_index{$1} = 1;
697 warn "$ERROR Bad defindex line: $_";
700 } elsif (defined($def_map{$tag})) {
701 if ($def_map{$tag}) {
703 $tag = $def_map{$tag};
707 } elsif (defined($user_sub{$tag})) {
709 $sub = $user_sub{$tag};
710 print "# user $tag = $sub, arg: $_" if $debug & $DEBUG_USER;
711 if (defined(&$sub)) {
715 warn "$ERROR Bad user sub for $tag: $sub\n";
719 if (defined($def_map{$tag})) {
722 # extra definition line
728 while (/\{([^\{\}]*)\}/) {
729 # this is a {} construct
730 ($before, $contents, $after) = ($`, $1, $');
732 $contents =~ s/\s+/$;9/g;
733 # restore $_ protecting {}
734 $_ = "$before$;7$contents$;8$after";
736 @args = split(/\s+/, &protect_html($_));
738 s/$;9/ /g; # unprotect spaces
742 $type = shift(@args);
743 $type =~ s/^\{(.*)\}$/$1/;
744 print "# def ($tag): {$type} ", join(', ', @args), "\n"
745 if $debug & $DEBUG_DEF;
746 $type .= ':'; # it's nicer like this
747 $name = shift(@args);
748 $name =~ s/^\{(.*)\}$/$1/;
750 $_ = &debug("<DT>", __LINE__);
752 $_ = &debug("<DL>\n<DT>", __LINE__);
754 if ($tag eq 'deffn' || $tag eq 'defvr' || $tag eq 'deftp') {
755 $_ .= "<U>$type</U> <B>$name</B>";
756 $_ .= " <I>@args</I>" if @args;
757 } elsif ($tag eq 'deftypefn' || $tag eq 'deftypevr'
758 || $tag eq 'defcv' || $tag eq 'defop') {
760 $name = shift(@args);
761 $name =~ s/^\{(.*)\}$/$1/;
762 $_ .= "<U>$type</U> $ftype <B>$name</B>";
763 $_ .= " <I>@args</I>" if @args;
765 warn "$ERROR Unknown definition type: $tag\n";
766 $_ .= "<U>$type</U> <B>$name</B>";
767 $_ .= " <I>@args</I>" if @args;
769 $_ .= &debug("\n<DD>", __LINE__);
770 $name = &unprotect_html($name);
771 if ($tag eq 'deffn' || $tag eq 'deftypefn') {
772 unshift(@input_spool, "\@findex $name\n");
773 } elsif ($tag eq 'defop') {
774 unshift(@input_spool, "\@findex $name on $ftype\n");
775 } elsif ($tag eq 'defvr' || $tag eq 'deftypevr' || $tag eq 'defcv') {
776 unshift(@input_spool, "\@vindex $name\n");
778 unshift(@input_spool, "\@tindex $name\n");
783 if ($format_map{$end_tag}) {
784 $in_pre = 0 if $format_map{$end_tag} eq 'PRE';
785 $in_list-- if $format_map{$end_tag} eq 'UL' || $format_map{$end_tag} eq 'OL' ;
786 &html_pop_if('LI', 'P');
788 push(@lines, &debug("</$format_map{$end_tag}>\n", __LINE__));
789 push(@lines, &html_debug("\n", __LINE__));
790 } elsif ($end_tag =~ /^(|f|v|multi)table$/) {
792 warn "$ERROR \@end $end_tag without \@*table\n";
795 ($table_type, $in_table) = split($;, shift(@tables));
796 unless ($1 eq $table_type) {
797 warn "$ERROR \@end $end_tag without matching \@$end_tag\n";
800 if ($table_type eq "multi") {
801 push(@lines, "</TR></TABLE>\n");
804 push(@lines, "</DL>\n");
809 ($table_type, $in_table) = split($;, $tables[0]);
813 } elsif (defined($def_map{$end_tag})) {
814 push(@lines, &debug("</DL>\n", __LINE__));
815 } elsif ($end_tag eq 'menu') {
817 push(@lines, $_); # must keep it for pass 2
824 # protect texi and HTML things
826 $_ = &protect_html($_) unless $dont_html;
828 # substitution (unsupported things)
833 # other substitutions
834 &simple_substitutions;
835 s/\@value{($VARRE)}/$value{$1}/eg;
836 s/\@footnote\{/\@footnote$docu_doc\{/g; # mark footnotes, cf. pass 4
838 # analyze the tag again
841 if (defined($sec2level{$tag}) && $sec2level{$tag} > 0) {
842 if (/^\@$tag\s+(.+)$/) {
845 $level = $sec2level{$tag};
846 $name = &update_sec_num($tag, $level) . " $name"
847 if $number_sections && $tag !~ /^unnumbered/;
848 if ($tag =~ /heading$/) {
849 push(@lines, &html_debug("\n", __LINE__));
850 if ($html_element ne 'body') {
851 # We are in a nice pickle here. We are trying to get a H? heading
852 # even though we are not in the body level. So, we convert it to a
853 # nice, bold, line by itself.
854 $_ = &debug("\n\n<P><STRONG>$name</STRONG></P>\n\n", __LINE__);
856 $_ = &debug("<H$level>$name</H$level>\n", __LINE__);
857 &html_push_if('body');
859 print "# heading, section $name, level $level\n"
860 if $debug & $DEBUG_TOC;
862 if ($split_chapter) {
864 # first time we see a "section"
865 unless ($level == 1) {
866 warn "$ERROR The first section found is not of level 1: $_";
867 warn "$ERROR I'll split on sections of level $level...\n";
871 if ($level == $toplevel) {
873 push(@lines, $SPLITTAG) if $split_num++;
874 push(@sections, $name);
878 $docid = "SEC$sec_num";
879 $tocid = "TOC$sec_num";
880 # check biblio and glossary
881 $in_bibliography = ($name =~ /^([A-Z]|\d+)?(\.\d+)*\s*bibliography$/i);
882 $in_glossary = ($name =~ /^([A-Z]|\d+)?(\.\d+)*\s*glossary$/i);
885 if ($node2sec{$node}) {
886 warn "$ERROR Duplicate node found: $node\n";
888 $node2sec{$node} = $name;
889 $node2href{$node} = "$docu_doc#$docid";
890 print "# node $node, section $name, level $level\n"
891 if $debug & $DEBUG_TOC;
895 print "# no node, section $name, level $level\n"
896 if $debug & $DEBUG_TOC;
899 while ($level > $curlevel) {
901 push(@toc_lines, "<UL>\n");
903 while ($level < $curlevel) {
905 push(@toc_lines, "</UL>\n");
907 $_ = "<LI>" . &anchor($tocid, "$docu_doc#$docid", $name, 1);
908 push(@toc_lines, &substitute_style($_));
910 push(@lines, &html_debug("\n", __LINE__));
912 $_ = "<H$level>".&anchor($docid, "$docu_toc#$tocid", $name)."</H$level>\n";
913 $_ = &debug($_, __LINE__);
914 push(@lines, &html_debug("\n", __LINE__));
917 foreach $line (split(/\n+/, $_)) {
918 push(@lines, "$line\n");
922 warn "$ERROR Bad section line: $_";
926 $value{$1} = $2, next if /^\@set\s+($VARRE)\s+(.*)$/o;
927 delete $value{$1}, next if /^\@clear\s+($VARRE)\s*$/o;
929 $value{'_setfilename'} = $1, next if /^\@setfilename\s+(.*)$/;
930 $value{'_settitle'} = $1, next if /^\@settitle\s+(.*)$/;
931 $value{'_author'} .= "$1\n", next if /^\@author\s+(.*)$/;
932 $value{'_subtitle'} .= "$1\n", next if /^\@subtitle\s+(.*)$/;
933 $value{'_title'} .= "$1\n", next if /^\@title\s+(.*)$/;
935 if (/^\@(..?)index\s+/) {
936 unless ($valid_index{$1}) {
937 warn "$ERROR Undefined index command: $_";
940 $id = 'IDX' . ++$idx_num;
941 $index = $1 . 'index';
942 $what = &substitute_style($');
944 print "# found $index for '$what' id $id\n"
945 if $debug & $DEBUG_INDEX;
947 if (defined(\$$index\{\$what\})) {
948 \$$index\{\$what\} .= "$;$docu_doc#$id";
950 \$$index\{\$what\} = "$docu_doc#$id";
954 # dirty hack to see if I can put an invisible anchor...
956 if ($html_element eq 'P' ||
957 $html_element eq 'LI' ||
958 $html_element eq 'DT' ||
959 $html_element eq 'DD' ||
960 $html_element eq 'ADDRESS' ||
961 $html_element eq 'B' ||
962 $html_element eq 'BLOCKQUOTE' ||
963 $html_element eq 'PRE' ||
964 $html_element eq 'SAMP') {
965 push(@lines, &anchor($id, '', $invisible_mark, !$in_pre));
966 } elsif ($html_element eq 'body') {
967 push(@lines, &debug("<P>\n", __LINE__));
968 push(@lines, &anchor($id, '', $invisible_mark, !$in_pre));
970 } elsif ($html_element eq 'DL' ||
971 $html_element eq 'UL' ||
972 $html_element eq 'OL' ) {
973 $deferred_ref .= &anchor($id, '', $invisible_mark, !$in_pre) . " ";
978 if (/^\@itemx?\s+/) {
981 if ($in_bibliography && $use_bibliography) {
982 if ($what =~ /^$BIBRE$/o) {
983 $id = 'BIB' . ++$bib_num;
984 $bib2href{$what} = "$docu_doc#$id";
985 print "# found bibliography for '$what' id $id\n"
986 if $debug & $DEBUG_BIB;
987 $what = &anchor($id, '', $what);
989 } elsif ($in_glossary && $use_glossary) {
990 $id = 'GLOSS' . ++$gloss_num;
992 $entry =~ tr/A-Z/a-z/ unless $entry =~ /^[A-Z\s]+$/;
993 $gloss2href{$entry} = "$docu_doc#$id";
994 print "# found glossary for '$entry' id $id\n"
995 if $debug & $DEBUG_GLOSS;
996 $what = &anchor($id, '', $what);
999 if ($html_element eq 'DL' || $html_element eq 'DD') {
1000 if ($things_map{$in_table} && !$what) {
1001 # special case to allow @table @bullet for instance
1002 push(@lines, &debug("<DT>$things_map{$in_table}\n", __LINE__));
1004 push(@lines, &debug("<DT>\@$in_table\{$what\}\n", __LINE__));
1006 push(@lines, "<DD>");
1007 &html_push('DD') unless $html_element eq 'DD';
1008 if ($table_type) { # add also an index
1009 unshift(@input_spool, "\@${table_type}index $what\n");
1011 } elsif ($html_element eq 'TABLE') {
1012 push(@lines, &debug("<TR><TD>$what</TD>\n", __LINE__));
1014 } elsif ($html_element eq 'TR') {
1015 push(@lines, &debug("</TR>\n", __LINE__));
1016 push(@lines, &debug("<TR><TD>$what</TD>\n", __LINE__));
1018 push(@lines, &debug("<LI>$what\n", __LINE__));
1019 &html_push('LI') unless $html_element eq 'LI';
1021 push(@lines, &html_debug("\n", __LINE__));
1022 if ($deferred_ref) {
1023 push(@lines, &debug("$deferred_ref\n", __LINE__));
1027 } elsif (/^\@tab\s+(.*)$/) {
1028 push(@lines, "<TD>$1</TD>\n");
1033 # paragraph separator
1035 next if $#lines >= 0 && $lines[$#lines] eq "\n";
1036 if ($html_element eq 'P') {
1038 $_ = &debug("</P>\n", __LINE__);
1041 } elsif ($html_element eq 'body' || $html_element eq 'BLOCKQUOTE') {
1042 push(@lines, "<P>\n");
1044 $_ = &debug($_, __LINE__);
1052 while ($level < $curlevel) {
1054 push(@toc_lines, "</UL>\n");
1057 print "# end of pass 1\n" if $verbose;
1059 #+++############################################################################
1061 # Pass 2/3: handle style, menu, index, cross-reference #
1063 #---############################################################################
1065 @lines2 = (); # whole document (2nd pass)
1066 @lines3 = (); # whole document (3rd pass)
1067 $in_menu = 0; # am I inside a menu
1072 # special case (protected sections)
1074 if (/^$PROTECTTAG/o) {
1081 $in_menu = 1, push(@lines2, &debug("<UL>\n", __LINE__)), next if /^\@menu\b/;
1082 $in_menu = 0, push(@lines2, &debug("</UL>\n", __LINE__)), next if /^\@end\s+menu\b/;
1084 if (/^\*\s+($NODERE)::/o) {
1087 &menu_entry($1, $1, $descr);
1088 } elsif (/^\*\s+(.+):\s+([^\t,\.\n]+)[\t,\.\n]/) {
1091 &menu_entry($1, $2, $descr);
1093 warn "$ERROR Bad menu line: $_";
1094 } else { # description continued?
1102 if (/^\@printindex\s+(\w\w)\b/) {
1103 local($index, *ary, @keys, $key, $letter, $last_letter, @refs);
1104 if ($predefined_index{$1}) {
1105 $index = $predefined_index{$1} . 'index';
1107 $index = $1 . 'index';
1109 eval("*ary = *$index");
1111 foreach $key (@keys) {
1113 1 while s/<(\w+)>\`(.*)\'<\/\1>/$2/; # remove HTML tags with quotes
1114 1 while s/<(\w+)>(.*)<\/\1>/$2/; # remove HTML tags
1115 $_ = &unprotect_html($_);
1117 tr/A-Z/a-z/; # lowercase
1118 $key2alpha{$key} = $_;
1119 print "# index $key sorted as $_\n"
1120 if $key ne $_ && $debug & $DEBUG_INDEX;
1122 push(@lines2, "Jump to:\n");
1123 $last_letter = undef;
1124 foreach $key (sort byalpha @keys) {
1125 $letter = substr($key2alpha{$key}, 0, 1);
1126 $letter = substr($key2alpha{$key}, 0, 2) if $letter eq $;;
1127 if (!defined($last_letter) || $letter ne $last_letter) {
1128 push(@lines2, "-\n") if defined($last_letter);
1129 push(@lines2, "<A HREF=\"#$index\_$letter\">" . &protect_html($letter) . "</A>\n");
1130 $last_letter = $letter;
1133 push(@lines2, "<P>\n");
1134 $last_letter = undef;
1135 foreach $key (sort byalpha @keys) {
1136 $letter = substr($key2alpha{$key}, 0, 1);
1137 $letter = substr($key2alpha{$key}, 0, 2) if $letter eq $;;
1138 if (!defined($last_letter) || $letter ne $last_letter) {
1139 push(@lines2, "</DIR>\n") if defined($last_letter);
1140 push(@lines2, "<H2><A NAME=\"$index\_$letter\">" . &protect_html($letter) . "</A></H2>\n");
1141 push(@lines2, "<DIR>\n");
1142 $last_letter = $letter;
1145 foreach (split(/$;/, $ary{$key})) {
1146 push(@refs, &anchor('', $_, $key, 0));
1148 push(@lines2, "<LI>" . join(", ", @refs) . "\n");
1150 push(@lines2, "</DIR>\n") if defined($last_letter);
1154 # simple style substitutions
1156 $_ = &substitute_style($_);
1160 while (/\@(x|px|info|)ref{($XREFRE)(}?)/o) {
1161 # note: Texinfo may accept other characters
1162 ($type, $nodes, $full) = ($1, $2, $3);
1163 ($before, $after) = ($`, $');
1164 if (! $full && $after) {
1165 warn "$ERROR Bad xref (no ending } on line): $_";
1166 $_ = "$before$;0${type}ref\{$nodes$after";
1171 } elsif ($type eq 'px') {
1173 } elsif ($type eq 'info') {
1179 $next = shift(@lines);
1180 $next = &substitute_style($next);
1181 chop($nodes); # remove final newline
1182 if ($next =~ /\}/) { # split on 2 lines
1187 $next = shift(@lines);
1188 $next = &substitute_style($next);
1190 if ($next =~ /\}/) { # split on 3 lines
1194 warn "$ERROR Bad xref (no ending }): $_";
1195 $_ = "$before$;0xref\{$nodes$after";
1196 unshift(@lines, $next);
1201 $nodes =~ s/\s+/ /g; # remove useless spaces
1202 @args = split(/\s*,\s*/, $nodes);
1203 $node = $args[0]; # the node is always the first arg
1204 &normalise_node($node);
1205 $sec = $node2sec{$node};
1206 if (@args == 5) { # reference to another manual
1207 $sec = $args[2] || $node;
1208 $man = $args[4] || $args[3];
1209 $_ = "${before}${type}section `$sec' in \@cite{$man}$after";
1210 } elsif ($type =~ /Info/) { # inforef
1211 warn "$ERROR Wrong number of arguments: $_" unless @args == 3;
1212 ($nn, $_, $in) = @args;
1213 $_ = "${before}${type} file `$in', node `$nn'$after";
1215 $href = $node2href{$node};
1216 $_ = "${before}${type}section " . &anchor('', $href, $sec) . $after;
1218 warn "$ERROR Undefined node ($node): $_";
1219 $_ = "$before$;0xref{$nodes}$after";
1223 # try to guess bibliography references or glossary terms
1225 unless (/^<H\d><A NAME=\"SEC\d/) {
1226 if ($use_bibliography) {
1229 ($pre, $what, $post) = ($`, $&, $');
1230 $href = $bib2href{$what};
1231 if (defined($href) && $post !~ /^[^<]*<\/A>/) {
1232 $done .= $pre . &anchor('', $href, $what);
1234 $done .= "$pre$what";
1240 if ($use_glossary) {
1243 ($pre, $what, $post) = ($`, $&, $');
1245 $entry =~ tr/A-Z/a-z/ unless $entry =~ /^[A-Z\s]+$/;
1246 $href = $gloss2href{$entry};
1247 if (defined($href) && $post !~ /^[^<]*<\/A>/) {
1248 $done .= $pre . &anchor('', $href, $what);
1250 $done .= "$pre$what";
1260 print "# end of pass 2\n" if $verbose;
1263 # split style substitutions
1266 $_ = shift(@lines2);
1268 # special case (protected sections)
1270 if (/^$PROTECTTAG/o) {
1275 # split style substitutions
1278 while ($old ne $_) {
1281 ($before, $style, $after) = ($`, $1, $');
1282 if (defined($style_map{$style})) {
1295 $_ = shift(@lines2);
1299 die "* Bad syntax (\@$style) after: $before\n";
1301 $text = &apply_style($style, $text);
1302 $_ = "$before$text$after";
1310 print "# end of pass 3\n" if $verbose;
1312 #+++############################################################################
1314 # Pass 4: foot notes, final cleanup #
1316 #---############################################################################
1318 @foot_lines = (); # footnotes
1319 @doc_lines = (); # final document
1320 $end_of_para = 0; # true if last line is <P>
1323 $_ = shift(@lines3);
1325 # special case (protected sections)
1327 if (/^$PROTECTTAG/o) {
1328 push(@doc_lines, $_);
1335 while (/\@footnote([^\{\s]+)\{/) {
1336 ($before, $d, $after) = ($`, $1, $');
1349 $_ = shift(@lines3);
1353 die "* Bad syntax (\@footnote) after: $before\n";
1356 $docid = "DOCF$foot_num";
1357 $footid = "FOOT$foot_num";
1358 $foot = "($foot_num)";
1359 push(@foot_lines, "<H3>" . &anchor($footid, "$d#$docid", $foot) . "</H3>\n");
1360 $text = "<P>$text" unless $text =~ /^\s*<P>/;
1361 push(@foot_lines, "$text\n");
1362 $_ = $before . &anchor($docid, "$docu_foot#$footid", $foot) . $after;
1366 # remove unnecessary <P>
1368 if (/^\s*<P>\s*$/) {
1369 next if $end_of_para++;
1374 push(@doc_lines, $_);
1376 print "# end of pass 4\n" if $verbose;
1378 #+++############################################################################
1380 # Pass 5: print things #
1382 #---############################################################################
1385 <!-- This HTML file has been created by $THISPROG
1386 from $docu on $TODAY -->
1389 $full_title = $value{'_title'} || $value{'_settitle'} || "Untitled Document";
1390 $title = $value{'_settitle'} || $full_title;
1391 $_ = &substitute_style($full_title);
1393 s/\n$//; # rmv last \n (if any)
1394 $full_title = "<H1>" . join("</H1>\n<H1>", split(/\n/, $_)) . "</H1>\n";
1399 if (!$monolithic && @toc_lines) {
1400 if (open(FILE, "> $docu_toc")) {
1401 print "# creating $docu_toc...\n" if $verbose;
1402 &print_toplevel_header("$title - Table of Contents");
1404 &print(*toc_lines, FILE);
1405 &print_toplevel_footer;
1408 warn "$ERROR Can't write to $docu_toc: $!\n";
1415 if (!$monolithic && @foot_lines) {
1416 if (open(FILE, "> $docu_foot")) {
1417 print "# creating $docu_foot...\n" if $verbose;
1418 &print_toplevel_header("$title - Footnotes");
1420 &print(*foot_lines, FILE);
1421 &print_toplevel_footer;
1424 warn "$ERROR Can't write to $docu_foot: $!\n";
1431 if ($split_chapter || $split_node) { # split
1433 $last_num = scalar(@sections);
1434 $first_doc = &doc_name(1);
1435 $last_doc = &doc_name($last_num);
1437 $section = shift(@sections);
1439 if (open(FILE, "> $docu_doc")) {
1440 print "# creating $docu_doc...\n" if $verbose;
1441 &print_header("$title - $section");
1442 $prev_doc = ($doc_num == 1 ? undef : &doc_name($doc_num - 1));
1443 $next_doc = ($doc_num == $last_num ? undef : &doc_name($doc_num + 1));
1444 $navigation = "Go to the ";
1445 $navigation .= ($prev_doc ? &anchor('', $first_doc, "first") : "first");
1446 $navigation .= ", ";
1447 $navigation .= ($prev_doc ? &anchor('', $prev_doc, "previous") : "previous");
1448 $navigation .= ", ";
1449 $navigation .= ($next_doc ? &anchor('', $next_doc, "next") : "next");
1450 $navigation .= ", ";
1451 $navigation .= ($next_doc ? &anchor('', $last_doc, "last") : "last");
1452 $navigation .= " section, " . &anchor('', $docu_toc, "table of contents") . ".\n";
1453 print FILE $navigation;
1455 # find corresponding lines
1457 while (@doc_lines) {
1458 $_ = shift(@doc_lines);
1459 last if ($_ eq $SPLITTAG);
1460 push(@tmp_lines, $_);
1462 &print(*tmp_lines, FILE);
1464 print FILE $navigation;
1468 warn "$ERROR Can't write to $docu_doc: $!\n";
1471 } else { # not split
1472 if (open(FILE, "> $docu_doc")) {
1473 print "# creating $docu_doc...\n" if $verbose;
1474 if ($monolithic || !@toc_lines) {
1475 &print_toplevel_header($title);
1477 &print_header($title);
1478 print FILE $full_title;
1480 if ($monolithic && @toc_lines) {
1482 print FILE "<H1>Table of Contents</H1>\n";
1483 &print(*toc_lines, FILE);
1486 &print(*doc_lines, FILE);
1487 if ($monolithic && @foot_lines) {
1489 print FILE "<H1>Footnotes</H1>\n";
1490 &print(*foot_lines, FILE);
1492 if ($monolithic || !@toc_lines) {
1493 &print_toplevel_footer;
1499 warn "$ERROR Can't write to $docu_doc: $!\n";
1503 print "# that's all folks\n" if $verbose;
1505 #+++############################################################################
1507 # Low level functions #
1509 #---############################################################################
1511 sub update_sec_num {
1512 local($name, $level) = @_;
1514 $level--; # here we start at 0
1515 if ($name =~ /^appendix/) {
1517 if (defined(@appendix_sec_num)) {
1518 &incr_sec_num($level, @appendix_sec_num);
1520 @appendix_sec_num = ('A', 0, 0, 0);
1522 return(join('.', @appendix_sec_num[0..$level]));
1525 if (defined(@normal_sec_num)) {
1526 &incr_sec_num($level, @normal_sec_num);
1528 @normal_sec_num = (1, 0, 0, 0);
1530 return(join('.', @normal_sec_num[0..$level]));
1538 foreach $l ($level+1 .. 3) {
1544 local($_, %seen, %context, $before, $match, $after);
1547 if (/\@(\*|\.|\:|\@|\{|\})/) {
1549 $context{$&} .= "> $_" if $verbose;
1554 ($before, $match, $after) = ($`, $&, $');
1555 if ($before =~ /\b[\w-]+$/ && $after =~ /^[\w-.]*\b/) { # e-mail address
1556 $seen{'e-mail address'}++;
1557 $context{'e-mail address'} .= "> $_" if $verbose;
1560 $context{$match} .= "> $_" if $verbose;
1563 $_ = "$before$match$after";
1568 foreach (sort(keys(%seen))) {
1573 print "$_ ($seen{$_})\n";
1582 if (open($fh_name, $name)) {
1583 unshift(@fhs, $fh_name);
1585 warn "$ERROR Can't read file $name: $!\n";
1590 @fhs = (); # hold the file handles to read
1591 @input_spool = (); # spooled lines to read
1600 $line = shift(@input_spool);
1606 return($line) if $line;
1613 # used in pass 1, use &next_line
1618 while ($_ = &next_line) {
1619 return if /^\@end\s+$tag\s*$/;
1621 die "* Failed to find '$tag' after: " . $lines[$#lines];
1625 # HTML stacking to have a better HTML output
1629 @html_stack = ('html');
1630 $html_element = 'body';
1635 push(@html_stack, $html_element);
1636 $html_element = $what;
1641 push(@html_stack, $html_element)
1642 if ($html_element && $html_element ne 'P');
1643 $html_element = $what;
1647 $html_element = pop(@html_stack);
1655 if ($elt eq $html_element) {
1656 $html_element = pop(@html_stack) if @html_stack;
1661 $html_element = pop(@html_stack) if @html_stack;
1666 local($what, $line) = @_;
1667 return("<!-- $line @html_stack, $html_element -->$what")
1668 if $debug & $DEBUG_HTML;
1672 # to debug the output...
1674 local($what, $line) = @_;
1675 return("<!-- $line -->$what")
1676 if $debug & $DEBUG_HTML;
1680 sub normalise_node {
1687 local($entry, $node, $descr) = @_;
1690 &normalise_node($node);
1691 $href = $node2href{$node};
1694 $descr = ": $descr" if $descr;
1695 push(@lines2, "<LI>" . &anchor('', $href, $entry) . "$descr\n");
1697 warn "$ERROR Undefined node ($node): $_";
1701 sub do_ctrl { "^$_[0]" }
1704 local($addr, $text) = split(/,\s*/, $_[0]);
1706 $text = $addr unless $text;
1707 &anchor('', "mailto:$addr", $text);
1710 sub do_sc { "\U$_[0]\E" }
1713 local($url, $text) = split(/,\s*/, $_[0]);
1715 $text = $url unless $text;
1716 &anchor('', $url, $text);
1719 sub do_url { &anchor('', $_[0], $_[0]) }
1722 local($texi_style, $text) = @_;
1725 $style = $style_map{$texi_style};
1726 if (defined($style)) { # known style
1727 if ($style =~ /^\"/) { # add quotes
1729 $text = "\`$text\'";
1731 if ($style =~ /^\&/) { # custom
1733 $text = &$style($text);
1734 } elsif ($style) { # good style
1735 $text = "<$style>$text</$style>";
1738 } else { # unknown style
1744 # remove Texinfo styles
1747 s/\@\w+{([^\{\}]+)}/$1/g;
1751 sub substitute_style {
1753 local($changed, $done, $style, $text);
1759 while (/\@(\w+){([^\{\}]+)}/) {
1760 $text = &apply_style($1, $2);
1775 local($name, $href, $text, $newline) = @_;
1779 $result .= " NAME=\"$name\"" if $name;
1780 $result .= " HREF=\"$href\"" if $href;
1781 $result .= ">$text</A>";
1782 $result .= "\n" if $newline;
1787 local(@MoY, $sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst);
1789 @MoY = ('January', 'Febuary', 'March', 'April', 'May', 'June',
1790 'July', 'August', 'September', 'October', 'November', 'December');
1791 ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time);
1792 $year += ($year < 70) ? 2000 : 1900;
1793 return("$mday $MoY[$mon] $year");
1799 return("${docu_name}_$num.html");
1803 $docu_doc = &doc_name(++$doc_num);
1807 local(*lines, $fh) = @_;
1812 if (/^$PROTECTTAG/o) {
1822 print FILE "<P><HR><P>\n";
1829 $_ = &remove_style($_[0]);
1832 if ($doctype eq 'html2') {
1833 print FILE $html2_doctype;
1834 } elsif ($doctype) {
1835 print FILE $doctype;
1847 sub print_toplevel_header {
1850 &print_header; # pass given arg...
1851 print FILE $full_title;
1852 if ($value{'_subtitle'}) {
1853 $value{'_subtitle'} =~ s/\n+$//;
1854 foreach (split(/\n/, $value{'_subtitle'})) {
1855 $_ = &substitute_style($_);
1857 print FILE "<H2>$_</H2>\n";
1860 if ($value{'_author'}) {
1861 $value{'_author'} =~ s/\n+$//;
1862 foreach (split(/\n/, $value{'_author'})) {
1863 $_ = &substitute_style($_);
1865 s/[\w.-]+\@[\w.-]+/<A HREF="mailto:$&">$&<\/A>/g;
1866 print FILE "<ADDRESS>$_</ADDRESS>\n";
1879 sub print_toplevel_footer {
1882 This document was generated on $TODAY using the
1883 <A HREF=\"$HOMEPAGE\">texi2html</A>
1884 translator version 1.52.</P>
1901 $what =~ s/\&/\&\#38;/g;
1902 $what =~ s/\</\&\#60;/g;
1903 $what =~ s/\>/\&\#62;/g;
1904 # but recognize some HTML things
1905 $what =~ s/\&\#60;\/A\&\#62;/<\/A>/g; # </A>
1906 $what =~ s/\&\#60;A ([^\&]+)\&\#62;/<A $1>/g; # <A [^&]+>
1907 $what =~ s/\&\#60;IMG ([^\&]+)\&\#62;/<IMG $1>/g; # <IMG [^&]+>
1911 sub unprotect_texi {
1919 sub unprotect_html {
1921 $what =~ s/\&\#38;/\&/g;
1922 $what =~ s/\&\#60;/\</g;
1923 $what =~ s/\&\#62;/\>/g;
1928 $key2alpha{$a} cmp $key2alpha{$b};
1931 ##############################################################################
1933 # These next few lines are legal in both Perl and nroff.
1937 'di \" finish diversion--previous line must be blank
1938 .nr nl 0-1 \" fake up transition to first page again
1939 .nr % 0 \" start at page 1
1940 '; __END__ ############# From here on it's a standard manual page ############
1941 .TH TEXI2HTML 1 "01/05/98"
1944 texi2html \- a Texinfo to HTML converter
1946 .B texi2html [options] file
1948 .B texi2html -check [-verbose] files
1951 converts the given Texinfo file to a set of HTML files. It tries to handle
1952 most of the Texinfo commands. It creates hypertext links for cross-references,
1955 It also tries to add links from a reference to its corresponding entry in the
1956 bibliography (if any). It may also handle a glossary (see the
1961 creates several files depending on the contents of the Texinfo file and on
1962 the chosen options (see FILES).
1964 The HTML files created by
1966 are closer to TeX than to Info, that's why
1968 converts @iftex sections and not @ifinfo ones by default. You can reverse
1969 this with the \-expandinfo option.
1973 Check the given file and give the list of all things that may be Texinfo commands.
1974 This may be used to check the output of
1976 to find the Texinfo commands that have been left in the HTML file.
1979 Expand @ifinfo sections, not @iftex ones.
1982 Use the section named 'Glossary' to build a list of terms and put links in the HTML
1983 document from each term toward its definition.
1985 .B \-invisible \fIname\fP
1986 Use \fIname\fP to create invisible destination anchors for index links
1987 (you can for instance use the invisible.xbm file shipped with this program).
1988 This is a workaround for a known bug of many WWW browsers, including netscape.
1991 Look also in \fIdir\fP to find included files.
1994 Show the Texinfo menus; by default they are ignored.
1997 Output only one file, including the table of contents and footnotes.
2000 Number the sections.
2003 Split the output into several HTML files (one per main section:
2004 chapter, appendix...).
2007 Split the output into several HTML files (one per node).
2010 Print usage instructions, listing the current available command-line options.
2013 Give a verbose output. Can be used with the
2020 creates the following files (foo being the name of the Texinfo file):
2023 The table of contents.
2026 The document's contents.
2029 The footnotes (if any).
2033 option, it creates several files (one per chapter or node), named
2035 (n being the indice of the chapter or node), instead of the single
2041 option, it creates only one file:
2045 predefines the following variables: \fBhtml\fP, \fBtexi2html\fP.
2046 .SH ADDITIONAL COMMANDS
2048 implements the following non-Texinfo commands (maybe they are in Texinfo now...):
2051 This indicates the start of an HTML section, this section will passed through
2052 without any modification.
2055 This indicates the end of an HTML section.
2057 This is \fItexi2html\fP version 1.52, 01/05/98.
2059 The latest version of \fItexi2html\fP can be found in WWW, cf. URL
2060 http://wwwinfo.cern.ch/dis/texi2html/
2062 The main author is Lionel Cons, CERN IT/DIS/OSE, Lionel.Cons@cern.ch.
2063 Many other people around the net contributed to this program.
2065 This program is the intellectual property of the European
2066 Laboratory for Particle Physics (known as CERN). No guarantee whatsoever is
2067 provided by CERN. No liability whatsoever is accepted for any loss or damage
2068 of any kind resulting from any defect or inaccuracy in this information or
2071 CERN, 1211 Geneva 23, Switzerland
2073 GNU Texinfo Documentation Format,
2074 HyperText Markup Language (HTML),
2075 World Wide Web (WWW).
2077 This program does not understand all Texinfo commands (yet).
2079 TeX specific commands (normally enclosed in @iftex) will be