From 9ae32b7ee83b1f36e7b8cfcff04381a47b248c80 Mon Sep 17 00:00:00 2001 From: "Graydon, Tracy" Date: Fri, 31 Aug 2012 12:29:30 -0700 Subject: [PATCH] TIVI-153: add as dependency for iputils --- BUGS | 7 + COPYING | 339 ++++++ ChangeLog | 119 ++ DOC/ChangeLog | 55 + DOC/Extras/README | 4 + DOC/Extras/docbook.dcl | 105 ++ DOC/Extras/docbook.dtd | 2023 ++++++++++++++++++++++++++++++++++ DOC/Extras/isonum.ent | 88 ++ DOC/Extras/isopub.ent | 96 ++ DOC/HTML/SGMLSpm/bugs.html | 21 + DOC/HTML/SGMLSpm/definition.html | 34 + DOC/HTML/SGMLSpm/events.html | 161 +++ DOC/HTML/SGMLSpm/example.html | 82 ++ DOC/HTML/SGMLSpm/extend.html | 59 + DOC/HTML/SGMLSpm/perl5.html | 26 + DOC/HTML/SGMLSpm/sample.pl | 1 + DOC/HTML/SGMLSpm/sgml.html | 23 + DOC/HTML/SGMLSpm/sgmls.html | 64 ++ DOC/HTML/SGMLSpm/sgmlsattribute.html | 121 ++ DOC/HTML/SGMLSpm/sgmlselement.html | 147 +++ DOC/HTML/SGMLSpm/sgmlsentity.html | 124 +++ DOC/HTML/SGMLSpm/sgmlsevent.html | 127 +++ DOC/HTML/SGMLSpm/sgmlsnotation.html | 69 ++ DOC/HTML/SGMLSpm/sgmlspm.html | 38 + DOC/HTML/SGMLSpm/sgmlspm.refs | 86 ++ DOC/HTML/SGMLSpm/terms.html | 32 + DOC/HTML/SGMLSpm/xtrainfo.html | 81 ++ DOC/HTML/sgmlspl/bugs.html | 20 + DOC/HTML/sgmlspl/definition.html | 42 + DOC/HTML/sgmlspl/dsssl.html | 34 + DOC/HTML/sgmlspl/forward.html | 130 +++ DOC/HTML/sgmlspl/generic.html | 185 ++++ DOC/HTML/sgmlspl/handlers.html | 77 ++ DOC/HTML/sgmlspl/installation.html | 29 + DOC/HTML/sgmlspl/output.html | 40 + DOC/HTML/sgmlspl/outputex.html | 37 + DOC/HTML/sgmlspl/popoutput.html | 27 + DOC/HTML/sgmlspl/pushoutput.html | 116 ++ DOC/HTML/sgmlspl/sgmlspl.html | 32 + DOC/HTML/sgmlspl/sgmlspl.refs | 78 ++ DOC/HTML/sgmlspl/skel.html | 29 + DOC/HTML/sgmlspl/specific.html | 118 ++ DOC/HTML/sgmlspl/specs.html | 41 + DOC/HTML/sgmlspl/terms.html | 32 + DOC/LaTeX/sample.pl | 1 + DOC/LaTeX/sgmlspl.refs | 24 + DOC/LaTeX/sgmlspl.tex | 575 ++++++++++ DOC/LaTeX/sgmlspm.refs | 29 + DOC/LaTeX/sgmlspm.tex | 583 ++++++++++ DOC/Makefile | 124 +++ DOC/README | 11 + DOC/TODO | 4 + DOC/sample.pl | 53 + DOC/sgmlspl.sgml | 780 +++++++++++++ DOC/sgmlspm.sgml | 883 +++++++++++++++ DOC/tohtml.pl | 569 ++++++++++ DOC/tolatex.pl | 508 +++++++++ Makefile | 98 ++ Output.pm | 225 ++++ README | 105 ++ Refs.pm | 216 ++++ SGMLS.pm | 893 +++++++++++++++ TODO | 6 + elisp/README | 22 + elisp/sgmls.el | 437 ++++++++ packaging/perl-SGMLSpm.changes | 75 ++ packaging/perl-SGMLSpm.spec | 46 + sgmlspl.pl | 317 ++++++ skel.pl | 178 +++ test-SGMLS.pl | 13 + 70 files changed, 11974 insertions(+) create mode 100644 BUGS create mode 100644 COPYING create mode 100644 ChangeLog create mode 100644 DOC/ChangeLog create mode 100644 DOC/Extras/README create mode 100644 DOC/Extras/docbook.dcl create mode 100644 DOC/Extras/docbook.dtd create mode 100644 DOC/Extras/isonum.ent create mode 100644 DOC/Extras/isopub.ent create mode 100644 DOC/HTML/SGMLSpm/bugs.html create mode 100644 DOC/HTML/SGMLSpm/definition.html create mode 100644 DOC/HTML/SGMLSpm/events.html create mode 100644 DOC/HTML/SGMLSpm/example.html create mode 100644 DOC/HTML/SGMLSpm/extend.html create mode 100644 DOC/HTML/SGMLSpm/perl5.html create mode 120000 DOC/HTML/SGMLSpm/sample.pl create mode 100644 DOC/HTML/SGMLSpm/sgml.html create mode 100644 DOC/HTML/SGMLSpm/sgmls.html create mode 100644 DOC/HTML/SGMLSpm/sgmlsattribute.html create mode 100644 DOC/HTML/SGMLSpm/sgmlselement.html create mode 100644 DOC/HTML/SGMLSpm/sgmlsentity.html create mode 100644 DOC/HTML/SGMLSpm/sgmlsevent.html create mode 100644 DOC/HTML/SGMLSpm/sgmlsnotation.html create mode 100644 DOC/HTML/SGMLSpm/sgmlspm.html create mode 100644 DOC/HTML/SGMLSpm/sgmlspm.refs create mode 100644 DOC/HTML/SGMLSpm/terms.html create mode 100644 DOC/HTML/SGMLSpm/xtrainfo.html create mode 100644 DOC/HTML/sgmlspl/bugs.html create mode 100644 DOC/HTML/sgmlspl/definition.html create mode 100644 DOC/HTML/sgmlspl/dsssl.html create mode 100644 DOC/HTML/sgmlspl/forward.html create mode 100644 DOC/HTML/sgmlspl/generic.html create mode 100644 DOC/HTML/sgmlspl/handlers.html create mode 100644 DOC/HTML/sgmlspl/installation.html create mode 100644 DOC/HTML/sgmlspl/output.html create mode 100644 DOC/HTML/sgmlspl/outputex.html create mode 100644 DOC/HTML/sgmlspl/popoutput.html create mode 100644 DOC/HTML/sgmlspl/pushoutput.html create mode 100644 DOC/HTML/sgmlspl/sgmlspl.html create mode 100644 DOC/HTML/sgmlspl/sgmlspl.refs create mode 100644 DOC/HTML/sgmlspl/skel.html create mode 100644 DOC/HTML/sgmlspl/specific.html create mode 100644 DOC/HTML/sgmlspl/specs.html create mode 100644 DOC/HTML/sgmlspl/terms.html create mode 120000 DOC/LaTeX/sample.pl create mode 100644 DOC/LaTeX/sgmlspl.refs create mode 100644 DOC/LaTeX/sgmlspl.tex create mode 100644 DOC/LaTeX/sgmlspm.refs create mode 100644 DOC/LaTeX/sgmlspm.tex create mode 100644 DOC/Makefile create mode 100644 DOC/README create mode 100644 DOC/TODO create mode 100644 DOC/sample.pl create mode 100644 DOC/sgmlspl.sgml create mode 100644 DOC/sgmlspm.sgml create mode 100644 DOC/tohtml.pl create mode 100644 DOC/tolatex.pl create mode 100644 Makefile create mode 100644 Output.pm create mode 100644 README create mode 100644 Refs.pm create mode 100644 SGMLS.pm create mode 100644 TODO create mode 100644 elisp/README create mode 100644 elisp/sgmls.el create mode 100644 packaging/perl-SGMLSpm.changes create mode 100644 packaging/perl-SGMLSpm.spec create mode 100755 sgmlspl.pl create mode 100644 skel.pl create mode 100644 test-SGMLS.pl diff --git a/BUGS b/BUGS new file mode 100644 index 0000000..77c688c --- /dev/null +++ b/BUGS @@ -0,0 +1,7 @@ +SGMLS.pm: Known bugs. + +- currently, escaped sequences for SDATA, record-ends, and octal +characters are _not_ processed in attribute values. I will add this +capability in the future, but it is not obvious how I should do it. + +- link attributes are currently ignored. \ No newline at end of file diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..e77696a --- /dev/null +++ b/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..ec9a51c --- /dev/null +++ b/ChangeLog @@ -0,0 +1,119 @@ +Tue Dec 5 07:18:09 1995 David Megginson + +* VERSION 1.03ii + + * elisp/sgmls.el: Added my Gnu Emacs 19 mode for running (n)sgmls + with a postprocessor. + + * Output.pm (push_output): For a 'handle' argument, force + unqualified filehandles into the caller's package (reported by + Rick Wong). + +* VERSION 1.03 + +Sun Dec 3 11:37:08 1995 David Megginson + + * sgmlspl.pl (main::sgml): Stopped using SGMLS_Event::key method + (why keep it around?). + + * SGMLS.pm: Added POD documentation to the module; changed the + SGMLS_Event class constructor so that it does not take an explicit + key argument (the key will be generated automatically if needed), + and propagated the change throughout the module. + +Sat Dec 2 21:24:05 1995 David Megginson + + * Refs.pm: Added POD documentation to the module. + + * Output.pm: Added POD documentation to the module. + +Thu Nov 16 06:52:46 1995 David Megginson + + * Refs.pm: Refs::warn now returns 1 if a warning was printed, or 0 + if it was not. + +Wed Nov 15 13:43:10 1995 David Megginson + + * Refs.pm: Added a new, simple package for managing references and + reporting any changes. + + * SGMLS.pm: SGMLS::new now forces its filehandle argument into the + caller's package, unless it is already qualified with a package + name; reported by Rick Wong. + + * Makefile: Install modules in ${MODULEDIR} instead of ${PERL5DIR} + -- that way, all add-ons can be qualified with SGMLS::. + + * Output.pm: Now uses Carp.pm and Exporter.pm to be a + better-behavied module. The symbols "output", "push_output", and + "pop_output" will be exported into the caller's package rather + than main::, and the caller will have some control over them. + + Changed the package from Output to SGMLS::Output.pm. + + * skel.pl: Use SGMLS::Output instead of Output. + + * sgmlspl.pl: Altered to create the SGMLS object with main::STDIN + as the filehandle. Have Output.pm export its symbols into main:: + rather than into SGMLS_PL::. + + Use SGMLS::Output instead of Output. + +* VERSION 1.02 + +Thu Aug 24 10:00:52 1995 David Megginson + + * SGMLS.pm: Added SGMLS_Event::parse to retrieve the SGMLS parent + class, SGMLS_Event::entity to look up an arbitrary entity, and + SGMLS_Event::notation to look up an arbitrary notation. + + * Makefile (HTML_SOURCES): fixed faulty path, reported by Rick + Wong (rwong@jessica.stanford.edu). + + * skel.pl: fixed bug in commented out 're' handler, reported by + Rick Wong (rwong@jessica.stanford.edu). + + * SGMLS.pm: fixed bug in SGMLS_Attribute::set_value, reported by + Rick Wong (rwong@jessica.stanford.edu). + + Loosened up checking of types in main loop, in case new types are + added in the future. + + Added support for new output from NSGMLS with -h option: 'T' for + external text entities, new entity types 'PI', 'TEXT', and 'ID'. + + Added support (not yet tested) for data entities. + + +* VERSION 1.01 + +Sat Aug 12 09:34:40 1995 David Megginson + + * Makefile: changed the order of the operations for make all, so + that the necessary files are installed _before_ trying to make the + documentation. Also, introduced the target "dist" to make the + most current version of all the documentation. + + * skel.pl: fixed the header comment for 'subdoc' handlers, and + added default handlers for 'end_element', 'cdata', 're', + 'end_subdoc', and 'conforming' (reported by Rick Wong + ). + + * SGMLS.pm: fixed bugs in SGMLS_Attribute::set_value, cleaned up + the recognition and processing of octal escape sequences in the + (N)SGMLS output, and fixed a minor bug with generated-filenames in + the external entities (reported by Rick Wong + ). + + * sgmls.pl: changed handler patterns to allow dashes and other + characters in element, subdoc, and entity names, and fixed error + in generic start_subdoc_handler (reported by Rick Wong + and Alan Karben + ). + + * Makefile: Corrected several typos in the "install_html" target + (reported by Rick Wong ). Changed the + perl binary location from /usr/bin/perl to /usr/local/bin/perl (I + am working on a slightly different system now). + +* VERSION 1.00 (initial release) \ No newline at end of file diff --git a/DOC/ChangeLog b/DOC/ChangeLog new file mode 100644 index 0000000..92511e7 --- /dev/null +++ b/DOC/ChangeLog @@ -0,0 +1,55 @@ +Tue Dec 5 07:20:09 1995 David Megginson + +* VERSION 1.03ii + + * Fixed typo in sgmlspl.sgml -- changed "references.hash" to + "references.refs" for consistency (reported by Rick Wong). + +* VERSION 1.03 + +Sun Dec 3 17:03:32 1995 David Megginson + + * tolatex.pl: Changed to use the SGMLS::Refs package. + + * tohtml.pl: Changed to use the SGMLS::Refs package. + + * sgmlspl.sgml: Added documentation for the SGMLS::Refs package, + and fixed up references to SGMLS::Output. List the version + explicitly in the title. + + * sgmlspm.sgml: List the version explicitly in the title. + +* VERSION 1.02 + +Thu Aug 24 10:14:09 1995 David Megginson + + * tolatex.pl and tohtml.pl: added support for wacky SYSID + modifications to NSGMLS. + + * Makefile (clean): added $(SGMLSPL) variable, so that users can + specify a location for sgmlspl not in their paths (suggested by + Rick Wong, rwong@jessica.stanford.edu). + +* VERSION 1.01 + +Sat Aug 12 09:58:17 1995 David Megginson + + * tohtml.pl: changed entity handler to deal with funky sysids + from NSGMLS (this needs more work). + + * Makefile (SGMLDECL): changed to + /usr/local/lib/sgml/sgmldecl/docbook.dcl to reflect changes in my + local system (your mileage may vary). + (clean): now _really_ cleans up. + (html): if the refs.pl file does not exist in the relevant + directory, make the HTML files _twice_. + + * sgmlspl.sgml: fixed some typos with the function name sgmls() + instead of sgml() (reported by Rick Wong + ). + + * sgmlspm.sgml: fixed the captions on tables 1 and 2 (reported by + Rick Wong ). + + +* VERSION 1.00 (initial release) \ No newline at end of file diff --git a/DOC/Extras/README b/DOC/Extras/README new file mode 100644 index 0000000..3716381 --- /dev/null +++ b/DOC/Extras/README @@ -0,0 +1,4 @@ +This directory contains the Docbook 2.2.1 DTD, the ISO +character-entity files for "Publishing" and "Numeric and Special +Graphic", and the SGML declaration distributed with Docbook. You will +require these only if they are not already installed on your system. diff --git a/DOC/Extras/docbook.dcl b/DOC/Extras/docbook.dcl new file mode 100644 index 0000000..e78c1d7 --- /dev/null +++ b/DOC/Extras/docbook.dcl @@ -0,0 +1,105 @@ + + + diff --git a/DOC/Extras/docbook.dtd b/DOC/Extras/docbook.dtd new file mode 100644 index 0000000..72eeb43 --- /dev/null +++ b/DOC/Extras/docbook.dtd @@ -0,0 +1,2023 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/DOC/Extras/isonum.ent b/DOC/Extras/isonum.ent new file mode 100644 index 0000000..7ee1414 --- /dev/null +++ b/DOC/Extras/isonum.ent @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/DOC/Extras/isopub.ent b/DOC/Extras/isopub.ent new file mode 100644 index 0000000..33d1252 --- /dev/null +++ b/DOC/Extras/isopub.ent @@ -0,0 +1,96 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/DOC/HTML/SGMLSpm/bugs.html b/DOC/HTML/SGMLSpm/bugs.html new file mode 100644 index 0000000..0056f67 --- /dev/null +++ b/DOC/HTML/SGMLSpm/bugs.html @@ -0,0 +1,21 @@ + + +Are there any bugs? + + + +

Links: Previous Up Top

+ +

Are there any bugs?

+ +

Of course! Right now, SGMLS.pm silently ignores link attributes +(nsgmls only) and data attributes, and there may be many other bugs +which I have not yet found.

+ + +

Links: Previous Up Top

+ + +
David Megginson <dmeggins@aix1.uottawa.ca>
+ + diff --git a/DOC/HTML/SGMLSpm/definition.html b/DOC/HTML/SGMLSpm/definition.html new file mode 100644 index 0000000..6570997 --- /dev/null +++ b/DOC/HTML/SGMLSpm/definition.html @@ -0,0 +1,34 @@ + + +What is SGMLS.pm? + + + +

Links: Next Previous Up Top

+ +

What is SGMLS.pm?

+ +

SGMLS.pm is an extensible perl5 +class library for parsing the output from James Clark's popular +sgmls and nsgmls parsers, available on the Internet at ftp://jclark.com. +This is not a complete system for translating +documents written the the Standard Generalised Markup +Language (SGML) into other formats, but it can easily +form the basis of such a system (for a simple example, see the sgmlspl +program included in this package).

+ +

The library recognises four basic types of SGML objects: the +element, the +attribute, +the notation, and the +entity; each +of these is a fully-developed class with methods for accessing +important information.

+ + +

Links: Next Previous Up Top

+ + +
David Megginson <dmeggins@aix1.uottawa.ca>
+ + diff --git a/DOC/HTML/SGMLSpm/events.html b/DOC/HTML/SGMLSpm/events.html new file mode 100644 index 0000000..372ccee --- /dev/null +++ b/DOC/HTML/SGMLSpm/events.html @@ -0,0 +1,161 @@ + + +What are the different event types and data? + + + +

Links: Next Previous Up Top

+ +

What are the different event types and data?

+ +

Table 2 lists the ten +different event types returned by the next_event +method of an SGMLS +object and the different types of data associated with each of these +(note that these do not correspond to the +standard ESIS events).

+ + +

Table 2: The SGMLS_Event types

+ +
+
+
Event Type
+
'start_element'
+
Event Data
+
SGMLS_Element
+
Description
+
The beginning of an element.
+ +
+
+ + +
+
Event Type
+
'end_element'
+
Event Data
+
SGMLS_Element
+
Description
+
The end of an element.
+ +
+
+ + +
+
Event Type
+
'cdata'
+
Event Data
+
string
+
Description
+
Regular character data.
+ +
+
+ + +
+
Event Type
+
'sdata'
+
Event Data
+
string
+
Description
+
Special system data.
+ +
+
+ + +
+
Event Type
+
're'
+
Event Data
+
[none]
+
Description
+
A record-end (i.e., a newline).
+ +
+
+ + +
+
Event Type
+
'pi'
+
Event Data
+
string
+
Description
+
A processing instruction
+ +
+
+ + +
+
Event Type
+
'entity'
+
Event Data
+
SGMLS_Entity
+
Description
+
A non-SGML external entity.
+ +
+
+ + +
+
Event Type
+
'start_subdoc'
+
Event Data
+
SGMLS_Entity
+
Description
+
The beginning of an SGML subdocument.
+ +
+
+ + +
+
Event Type
+
'end_subdoc'
+
Event Data
+
SGMLS_Entity
+
Description
+
The end of an SGML subdocument.
+ +
+
+ + +
+
Event Type
+
'conforming'
+
Event Data
+
[none]
+
Description
+
The document was valid.
+ +
+
+ + +

For example, if $event->type returns +'start_element', then +$event->data will return an object belonging to the +SGMLS_Element +class (which will contain a list of attributes, etc. -- see +below), $event->file and +$event->line will return the file and line-number +in which the element appeared (if you called sgmls or nsgmls with +the -l flag), and +$event->element will return the element currently +in force (in this case, the same as +$event->data).

+ + +

Links: Next Previous Up Top

+ + +
David Megginson <dmeggins@aix1.uottawa.ca>
+ + diff --git a/DOC/HTML/SGMLSpm/example.html b/DOC/HTML/SGMLSpm/example.html new file mode 100644 index 0000000..a263f30 --- /dev/null +++ b/DOC/HTML/SGMLSpm/example.html @@ -0,0 +1,82 @@ + + +How about a simple example? + + + +

Links: Next Previous Up Top

+ +

How about a simple example?

+ +

OK. The following script simply reports its events:

+ +

+

#!/usr/bin/perl
+
+use SGMLS;
+
+$this_parse = new SGMLS(STDIN); # Read from standard input.
+
+while ($this_event = $this_parse->next_event) {
+    my $type = $this_event->type;
+    my $data = $this_event->data;
+  SWITCH: {
+      $type eq 'start_element' && do {
+          print "Beginning element: " . $data->name . "\n";
+          last SWITCH;
+      };
+      $type eq 'end_element' && do {
+          print "Ending element: " . $data->name . "\n";
+          last SWITCH;
+      };
+      $type eq 'cdata' && do {
+          print "Character data: " . $data . "\n";
+          last SWITCH;
+      };
+      $type eq 'sdata' && do {
+          print "Special data: " . $data . "\n";
+          last SWITCH;
+      };
+      $type eq 're' && do {
+          print "Record End\n";
+          last SWITCH;
+      };
+      $type eq 'pi' && do {
+          print "Processing Instruction: " . $data . "\n";
+          last SWITCH;
+      };
+      $type eq 'entity' && do {
+          print "External Data Entity: " . $data->name .
+              " with notation " . $data->notation->name . "\n";
+          last SWITCH;
+      };
+      $type eq 'start_subdoc' && do {
+          print "Beginning Subdocument Entity: " . $data->name . "\n";
+          last SWITCH;
+      };
+      $type eq 'end_subdoc' && do {
+          print "Ending Subdocument Entity: " . $data->name . "\n";
+          last SWITCH;
+      };
+      $type eq 'conforming' && do {
+          print "This is a conforming SGML document\n";
+          last SWITCH;
+      };
+  }
+}
+
+

+

To use it under Unix, try something like

+ +

+

sgmls document.sgml | perl sample.pl
+

+

and watch the output scroll down.

+ + +

Links: Next Previous Up Top

+ + +
David Megginson <dmeggins@aix1.uottawa.ca>
+ + diff --git a/DOC/HTML/SGMLSpm/extend.html b/DOC/HTML/SGMLSpm/extend.html new file mode 100644 index 0000000..a40bd8c --- /dev/null +++ b/DOC/HTML/SGMLSpm/extend.html @@ -0,0 +1,59 @@ + + +How do I design my own classes? + + + +

Links: Next Previous Up Top

+ +

How do I design my own classes?

+ +

In addition to the methods listed above, all of the classes used +in SGMLS.pm have an ext method which returns a +reference to an initially-empty hash table. You are free to use this +hash table to store anything you want -- it +should be especially useful if you are building your own, derived +classes from the ones provided here. The following example builds a +derived class My_Element from the SGMLS_Element +class, adding methods to set and get the current font:

+ +

+

use SGMLS;
+
+package My_Element;
+@ISA = qw(SGMLS_Element);
+
+sub new {
+  my ($class,$element,$font) = @_;
+  $element->ext->{'font'} = $font;
+  return bless $element;
+}
+
+sub get_font {
+  my ($self) = @_;
+  return $self->ext->{'font'};
+}
+
+sub set_font {
+  my ($self,$font) = @_;
+  $self->ext->{'font'} = $font;
+}
+

+

Note that the derived class does not need to have any knowledge +about the underlying structure of the SGMLS_Element +class, and need only avoid shadowing any of the methods currently +existing there.

+ +

If you decide to create a derived class from the SGMLS, please note that in +addition to the methods listed above, that class uses internal methods +named element, line, and +file, similar to the same methods in SGMLS_Event -- +it is essential that you not shadow these method names.

+ + +

Links: Next Previous Up Top

+ + +
David Megginson <dmeggins@aix1.uottawa.ca>
+ + diff --git a/DOC/HTML/SGMLSpm/perl5.html b/DOC/HTML/SGMLSpm/perl5.html new file mode 100644 index 0000000..4a68f91 --- /dev/null +++ b/DOC/HTML/SGMLSpm/perl5.html @@ -0,0 +1,26 @@ + + +How do I program in perl5? + + + +

Links: Next Previous Up Top

+ +

How do I program in perl5?

+ +

If you have to ask this question, you probably should not be +trying to use this library right now, since it is intended only for +experienced perl5 programmers. That said, however, you can find the +perl5 documentation with the perl5 source distribution or on the +World-Wide Web at http://www.metronet.com/0/perlinfo/perl5/manual/perl.html.

+ +

Please do not write to me for help on using +perl5.

+ + +

Links: Next Previous Up Top

+ + +
David Megginson <dmeggins@aix1.uottawa.ca>
+ + diff --git a/DOC/HTML/SGMLSpm/sample.pl b/DOC/HTML/SGMLSpm/sample.pl new file mode 120000 index 0000000..36d789d --- /dev/null +++ b/DOC/HTML/SGMLSpm/sample.pl @@ -0,0 +1 @@ +../../sample.pl \ No newline at end of file diff --git a/DOC/HTML/SGMLSpm/sgml.html b/DOC/HTML/SGMLSpm/sgml.html new file mode 100644 index 0000000..193f8a0 --- /dev/null +++ b/DOC/HTML/SGMLSpm/sgml.html @@ -0,0 +1,23 @@ + + +How do I produce SGML documents? + + + +

Links: Next Previous Up Top

+ +

How do I produce SGML documents?

+ +

I am presuming here that you are already experienced with SGML +and the sgmls or nsgmls parser. For help with the parsers see the +manual pages accompanying each one; for help with SGML see Robin +Cover's SGML Web Page at http://www.sil.org/sgml/sgml.html +on the Internet.

+ + +

Links: Next Previous Up Top

+ + +
David Megginson <dmeggins@aix1.uottawa.ca>
+ + diff --git a/DOC/HTML/SGMLSpm/sgmls.html b/DOC/HTML/SGMLSpm/sgmls.html new file mode 100644 index 0000000..6d0bb29 --- /dev/null +++ b/DOC/HTML/SGMLSpm/sgmls.html @@ -0,0 +1,64 @@ + + +How do I use SGMLS.pm? + + + +

Links: Next Previous Up Top

+ +

How do I use SGMLS.pm?

+ +

First, you need to copy the file SGMLS.pm to a directory where +perl can find it (on a Unix system, it might be +/usr/lib/perl5 or +/usr/local/lib/perl5, or whatever the environment +variable PERL5LIB is set to) and make certain that it +is world-readable.

+ +

Next, near the top of your perl5 program, type the following +line:

+ +

+

use SGMLS;
+

+

You must then open up a file handle from which SGMLS.pm can read the +data from an sgmls or nsgmls process, unless you are reading from +a standard handle like STDIN -- for example, +if you are piping the output from sgmls to a perl5 script, using +something like

+ +

+

sgmls foo.sgml | perl myscript.pl
+

+

then the predefined filehandle STDIN will be +sufficient. In DOS, you might want to dump the sgmls output to a file +and use it as standard input (or open it explicitly in perl), and in +Unix, you might actually want to open a pipe or socket for the input. +SGMLS.pm doesn't need to seek, so any input stream should +work.

+ +

To parse the sgmls or nsgmls output from the handle, create +a new object instance of the SGMLS class with +the handle as an argument, i.e.

+ +

+

$parse = new SGMLS(STDIN);
+

+

(You may create more than one SGMLS +object at once, but each object must have a +unique handle pointing to a unique stream, or +chaos will result.) Now, you can retrieve and +process events using the next_event method:

+ +

+

while ($event = $parse->next_event) {
+    #do something with each event
+}
+

+ +

Links: Next Previous Up Top

+ + +
David Megginson <dmeggins@aix1.uottawa.ca>
+ + diff --git a/DOC/HTML/SGMLSpm/sgmlsattribute.html b/DOC/HTML/SGMLSpm/sgmlsattribute.html new file mode 100644 index 0000000..c5a7857 --- /dev/null +++ b/DOC/HTML/SGMLSpm/sgmlsattribute.html @@ -0,0 +1,121 @@ + + +What do I do with an +SGMLS_Attribute? + + + +

Links: Next Previous Up Top

+ +

What do I do with an +SGMLS_Attribute?

+ +

Note that objects of the SGMLS_Attribute +class do not have events in their own right, and are available only +through the attributes or +attribute(aname) methods for +SGMLS_Element +objects. An object belonging to the +SGMLS_Attribute class will recognise the +methods listed in table 4.

+ + +

Table 4: The SGMLS_Attribute class

+ +
+
+
Method
+
name
+
Return Type
+
string
+
Description
+
The name of the attribute (in upper-case).
+ +
+
+ + +
+
Method
+
type
+
Return Type
+
string
+
Description
+
The type of the attribute: 'IMPLIED', +'CDATA', 'NOTATION', +'ENTITY', or 'TOKEN'.
+ +
+
+ + +
+
Method
+
value
+
Return Type
+
string, SGMLS_Entity, or +SGMLS_Notation.
+
Description
+
The value of the attribute. If the type is +'CDATA' or 'TOKEN', it will be a +simple string; if it is 'NOTATION' it will be an +object belonging to the SGMLS_Notation class, +and if it is 'Entity' it will be an object +belonging to the SGMLS_Entity class.
+ +
+
+ + +
+
Method
+
is_implied
+
Return Type
+
boolean
+
Description
+
Return true if the value of the attribute is implied, or false if +it has an explicit value.
+ +
+
+ + +
+
Method
+
set_type(type)
+
Return Type
+
[none]
+
Description
+
Provide a new type for the current attribute -- no sanity +checking will be performed, so be careful.
+ +
+
+ + +
+
Method
+
set_value(value)
+
Return Type
+
[none]
+
Description
+
Provide a new value for the current attribute -- no sanity +checking will be performed, so be careful.
+ +
+
+ + +

Note that the type 'TOKEN' includes both +individual tokens and lists of tokens (ie 'TOKENS', +'IDS', or 'IDREFS' in the +original SGML document), so you might need to use the perl function +'split' to break the value string into a list.

+ + +

Links: Next Previous Up Top

+ + +
David Megginson <dmeggins@aix1.uottawa.ca>
+ + diff --git a/DOC/HTML/SGMLSpm/sgmlselement.html b/DOC/HTML/SGMLSpm/sgmlselement.html new file mode 100644 index 0000000..5a12466 --- /dev/null +++ b/DOC/HTML/SGMLSpm/sgmlselement.html @@ -0,0 +1,147 @@ + + +What do I do with an SGMLS_Element? + + + +

Links: Next Previous Up Top

+ +

What do I do with an SGMLS_Element?

+ +

Altogether, there are six classes in SGMLS.pm, each with its +own methods: in addition to SGMLS (for the parse) and +SGMLS_Event +(for a specific event), the classes are +SGMLS_Element, SGMLS_Attribute, +SGMLS_Entity, +and SGMLS_Notation. +Like all of these, SGMLS_Element has a number +of methods available for obtaining different types of information. +For example, if you were to use

+ +

+

my $element = $event->data
+

+

to retrieve the data for a 'start_element' or +'end_element' event, then you could use the methods +listed in table 3 to find more +information about the element.

+ + +

Table 3: The SGMLS_Element class

+ +
+
+
Method
+
name
+
Return Type
+
string
+
Description
+
The name (or GI), in upper-case.
+ +
+
+ + +
+
Method
+
parent
+
Return Type
+
SGMLS_Element
+
Description
+
The parent element, or '' if this is the top +element.
+ +
+
+ + +
+
Method
+
attributes
+
Return Type
+
HASH
+
Description
+
Return a reference to a hash table of +SGMLS_Attribute objects, keyed by the attribute +names (in upper-case).
+ +
+
+ + +
+
Method
+
attribute_names
+
Return Type
+
ARRAY
+
Description
+
A list of all attribute names for the current element (in +upper-case).
+ +
+
+ + +
+
Method
+
attribute(aname)
+
Return Type
+
SGMLS_Attribute
+
Description
+
Return the attribute named ANAME.
+ +
+
+ + +
+
Method
+
set_attribute(attribute)
+
Return Type
+
[none]
+
Description
+
The attribute argument should be an +object belonging to the SGMLS_Attribute +class. Add it to the element, replacing any previous attribute with +the same name.
+ +
+
+ + +
+
Method
+
in(name)
+
Return Type
+
SGMLS_Element
+
Description
+
If the current element's parent is named +name, return the parent; otherwise, return +''.
+ +
+
+ + +
+
Method
+
within(name)
+
Return Type
+
SGMLS_Element
+
Description
+
If any ancestor of the current element is named +name, return it; otherwise, return +''.
+ +
+
+ + + +

Links: Next Previous Up Top

+ + +
David Megginson <dmeggins@aix1.uottawa.ca>
+ + diff --git a/DOC/HTML/SGMLSpm/sgmlsentity.html b/DOC/HTML/SGMLSpm/sgmlsentity.html new file mode 100644 index 0000000..ce61c58 --- /dev/null +++ b/DOC/HTML/SGMLSpm/sgmlsentity.html @@ -0,0 +1,124 @@ + + +What do I do with an SGMLS_Entity? + + + +

Links: Next Previous Up Top

+ +

What do I do with an SGMLS_Entity?

+ +

An SGMLS_Entity object can come in an +'entity' event (in +which case it is always external), in a +'start_subdoc' or 'end_subdoc' +event (in which case it always has the type +'SUBDOC'), or as the value of an attribute (in +which case it may be internal or external). An object belonging to +the SGMLS_Entity class may use the methods +listed in table 5.

+ + +

Table 5: The SGMLS_Entity class

+ +
+
+
Method
+
name
+
Return Type
+
string
+
Description
+
The entity name.
+ +
+
+ + +
+
Method
+
type
+
Return Type
+
string
+
Description
+
The entity type: 'CDATA', +'SDATA', 'NDATA', or +'SUBDOC'.
+ +
+
+ + +
+
Method
+
value
+
Return Type
+
string
+
Description
+
The entity replacement text (internal entities +only).
+ +
+
+ + +
+
Method
+
sysid
+
Return Type
+
string
+
Description
+
The system identifier (external entities only).
+ +
+
+ + +
+
Method
+
pubid
+
Return Type
+
string
+
Description
+
The public identifier (external entities only).
+ +
+
+ + +
+
Method
+
filenames
+
Return Type
+
ARRAY
+
Description
+
A list of file names generated from the sysid and pubid +(external entities only).
+ +
+
+ + +
+
Method
+
notation
+
Return Type
+
SGMLS_Notation
+
Description
+
The associated notation (external data entities only).
+ +
+
+ + +

An entity of type 'SUBDOC' will have a sysid +and pubid, and external data entity will have a sysid, pubid, +filenames, and a notation, and an internal data entity will have a +value.

+ + +

Links: Next Previous Up Top

+ + +
David Megginson <dmeggins@aix1.uottawa.ca>
+ + diff --git a/DOC/HTML/SGMLSpm/sgmlsevent.html b/DOC/HTML/SGMLSpm/sgmlsevent.html new file mode 100644 index 0000000..85a070d --- /dev/null +++ b/DOC/HTML/SGMLSpm/sgmlsevent.html @@ -0,0 +1,127 @@ + + +So what do I do with an event? + + + +

Links: Next Previous Up Top

+ +

So what do I do with an event?

+ +

The next_event method for the SGMLS class returns an +object belonging to the class SGMLS_Event. +This class has several methods available, as listed in table 1.

+ + +

Table 1: The SGMLS_Event class

+ +
+
+
Method
+
type
+
Return Type
+
string
+
Description
+
Return the type of the event.
+ +
+
+ + +
+
Method
+
data
+
Return Type
+
string, SGMLS_Element, or +SGMLS_Entity
+
Description
+
Return any data associated with the event.
+ +
+
+ + +
+
Method
+
file
+
Return Type
+
string
+
Description
+
Return the name of the SGML source file which generated the +event, if available.
+ +
+
+ + +
+
Method
+
line
+
Return Type
+
string
+
Description
+
Return the line number of the SGML source file which +generated the event, if available.
+ +
+
+ + +
+
Method
+
element
+
Return Type
+
SGMLS_Element
+
Description
+
Return the element in force when the event was +generated.
+ +
+
+ + +
+
Method
+
parse
+
Return Type
+
Return the SGMLS object for the current +parse.
+ +
+
+ + +
+
Method
+
entity(ename)
+
Return Type
+
Look up an entity from those currently known to the parse. An +alias for ->parse->entity($ename)
+ +
+
+ + +
+
Method
+
notation(nname)
+
Return Type
+
Look up the notation from those currently known to the parse: +an alias for ->parse->notation($nname).
+ +
+
+ + +

The file and line methods +will return useful information only if you called sgmls or nsgmls +with the -l flag to include file and +line-number information.

+ + +

Links: Next Previous Up Top

+ + +
David Megginson <dmeggins@aix1.uottawa.ca>
+ + diff --git a/DOC/HTML/SGMLSpm/sgmlsnotation.html b/DOC/HTML/SGMLSpm/sgmlsnotation.html new file mode 100644 index 0000000..3271f01 --- /dev/null +++ b/DOC/HTML/SGMLSpm/sgmlsnotation.html @@ -0,0 +1,69 @@ + + +What do I do with an SGMLS_Notation? + + + +

Links: Next Previous Up Top

+ +

What do I do with an SGMLS_Notation?

+ +

The fourth data class is the notation, which is available only +as a return value from the notation method of an +SGMLS_Entity +or the value method of an SGMLS_Attribute +with type 'NOTATION'. You can use the notation to +decide how to treat non-SGML data (such as graphics). An object +belonging to the SGMLS_Notation class will have +access to the methods listed in table 6.

+ + +

Table 6: The SGMLS_Notation class

+ +
+
+
Method
+
name
+
Return Type
+
string
+
Description
+
The notation's name.
+ +
+
+ + +
+
Method
+
sysid
+
Return Type
+
string
+
Description
+
The notation's system identifier.
+ +
+
+ + +
+
Method
+
pubid
+
Return Type
+
string
+
Description
+
The notation's public identifier.
+ +
+
+ + +

What you do with this information is +entirely up to you.

+ + +

Links: Next Previous Up Top

+ + +
David Megginson <dmeggins@aix1.uottawa.ca>
+ + diff --git a/DOC/HTML/SGMLSpm/sgmlspm.html b/DOC/HTML/SGMLSpm/sgmlspm.html new file mode 100644 index 0000000..fd81bd2 --- /dev/null +++ b/DOC/HTML/SGMLSpm/sgmlspm.html @@ -0,0 +1,38 @@ + + +SGMLS.pm: a perl5 class library for handling output from the +SGMLS and NSGMLS parsers (version 1.03) + + +

SGMLS.pm: a perl5 class library for handling output from the +SGMLS and NSGMLS parsers (version 1.03)

+ +

Welcome to SGMLS.pm, an extensible perl5 class library for +processing the output from the sgmls and nsgmls parsers. +SGMLS.pm is free, copyrighted software available by anonymous ftp in +the directory ftp://aix1.uottawa.ca/pub/dmeggins/. +You might also want to look at the documentation for sgmlspl, +a simple sample script which uses SGMLS.pm to convert documents from +SGML to other formats.

+ +
  • Terms
  • +
  • What is SGMLS.pm?
  • +
  • How do I produce SGML documents?
  • +
  • How do I program in perl5?
  • +
  • How do I use SGMLS.pm?
  • +
  • So what do I do with an event?
  • +
  • What are the different event types and data?
  • +
  • What do I do with an SGMLS_Element?
  • +
  • What do I do with an +SGMLS_Attribute?
  • +
  • What do I do with an SGMLS_Entity?
  • +
  • What do I do with an SGMLS_Notation?
  • +
  • Is there any extra information available from the SGML +document?
  • +
  • How about a simple example?
  • +
  • How do I design my own classes?
  • +
  • Are there any bugs?
  • + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/SGMLSpm/sgmlspm.refs b/DOC/HTML/SGMLSpm/sgmlspm.refs new file mode 100644 index 0000000..16a9330 --- /dev/null +++ b/DOC/HTML/SGMLSpm/sgmlspm.refs @@ -0,0 +1,86 @@ +{ + '' => '', + 'title:sgmls' => 'How do I use SGMLS.pm?', + 'previous:sgmlsevent.html' => 'sgmls.html', + 'previous:extend.html' => 'example.html', + 'previous:definition.html' => 'terms.html', + 'title:definition' => 'What is SGMLS.pm?', + 'firstname:sgmlspm' => 'David', + 'xref:table.class.sgmls.element' => '3', + 'orgdiv:sgmlspm' => 'Department of English', + 'title:example' => 'How about a simple example?', + 'title:bugs' => 'Are there any bugs?', + 'title:sgmlsnotation' => 'What do I do with an SGMLS_Notation?', + 'next:sgml.html' => 'perl5.html', + 'xref:table.class.sgmls' => '1', + 'previous:events.html' => 'sgmlsevent.html', + 'title:events' => 'What are the different event types and data?', + 'up:sgmlselement.html' => 'sgmlspm.html', + 'up:sgmlsattribute.html' => 'sgmlspm.html', + 'previous:sgmlsentity.html' => 'sgmlsattribute.html', + 'xref:table.class.sgmls.event' => '2', + 'xref:table.class.sgmls.extra' => '7', + 'title:extend' => 'How do I design my own classes?', + 'title:sgmlsevent' => 'So what do I do with an event?', + 'email:sgmlspm' => 'dmeggins@aix1.uottawa.ca', + 'next:xtrainfo.html' => 'example.html', + 'title:table.class.sgmls.entity' => 'The SGMLS_Entity class', + 'title:terms' => 'Terms', + 'next:terms.html' => 'definition.html', + 'xref:table.class.sgmls.notation' => '6', + 'up:extend.html' => 'sgmlspm.html', + 'up:sgmlsevent.html' => 'sgmlspm.html', + 'up:definition.html' => 'sgmlspm.html', + 'up:terms.html' => 'sgmlspm.html', + 'previous:sgml.html' => 'definition.html', + 'title:sgml' => 'How do I produce SGML documents?', + 'title:table.class.sgmls.notation' => 'The SGMLS_Notation class', + 'up:events.html' => 'sgmlspm.html', + 'next:sgmlsentity.html' => 'sgmlsnotation.html', + 'previous:sgmls.html' => 'perl5.html', + 'previous:perl5.html' => 'sgml.html', + 'title:perl5' => 'How do I program in perl5?', + 'up:sgmlsentity.html' => 'sgmlspm.html', + 'previous:xtrainfo.html' => 'sgmlsnotation.html', + 'previous:example.html' => 'xtrainfo.html', + 'title:xtrainfo' => 'Is there any extra information available from the SGML +document?', + 'previous:bugs.html' => 'extend.html', + 'title:table.class.sgmls.event' => 'The SGMLS_Event types', + 'title:sgmlselement' => 'What do I do with an SGMLS_Element?', + 'next:sgmlselement.html' => 'sgmlsattribute.html', + 'previous:sgmlsnotation.html' => 'sgmlsentity.html', + 'title:table.class.sgmls.extra' => 'Additional methods for the SGMLS +class', + 'xref:table.class.sgmls.attribute' => '4', + 'up:sgml.html' => 'sgmlspm.html', + 'title:sgmlsattribute' => 'What do I do with an +SGMLS_Attribute?', + 'next:sgmlsattribute.html' => 'sgmlsentity.html', + 'surname:sgmlspm' => 'Megginson', + 'xref:table.class.sgmls.entity' => '5', + 'title:sgmlspm' => 'SGMLS.pm: a perl5 class library for handling output from the +SGMLS and NSGMLS parsers (version 1.03)', + 'next:sgmls.html' => 'sgmlsevent.html', + 'next:extend.html' => 'bugs.html', + 'next:sgmlsevent.html' => 'events.html', + 'next:perl5.html' => 'sgmls.html', + 'next:definition.html' => 'sgml.html', + 'orgname:sgmlspm' => 'University of Ottawa', + 'up:sgmls.html' => 'sgmlspm.html', + 'next:example.html' => 'extend.html', + 'previous:sgmlselement.html' => 'events.html', + 'up:perl5.html' => 'sgmlspm.html', + 'up:xtrainfo.html' => 'sgmlspm.html', + 'next:sgmlsnotation.html' => 'xtrainfo.html', + 'up:example.html' => 'sgmlspm.html', + 'next:events.html' => 'sgmlselement.html', + 'up:bugs.html' => 'sgmlspm.html', + 'up:sgmlsnotation.html' => 'sgmlspm.html', + 'previous:sgmlsattribute.html' => 'sgmlselement.html', + 'title:table.class.sgmls.element' => 'The SGMLS_Element class', + 'title:table.class.sgmls' => 'The SGMLS_Event class', + 'title:table.class.sgmls.attribute' => 'The SGMLS_Attribute class', + 'title:sgmlsentity' => 'What do I do with an SGMLS_Entity?', + '' => '' +} diff --git a/DOC/HTML/SGMLSpm/terms.html b/DOC/HTML/SGMLSpm/terms.html new file mode 100644 index 0000000..36f816a --- /dev/null +++ b/DOC/HTML/SGMLSpm/terms.html @@ -0,0 +1,32 @@ + + +Terms + + + +

    Links: Next Up Top

    + +

    Terms

    + +

    This program, along with its documentation, is free software; +you can redistribute it and/or modify it under the terms of the GNU +General Public License as published by the Free Software Foundation; +either version 2 of the License, or (at your option) any later +version.

    + +

    This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details.

    + +

    You should have received a copy of the GNU General Public +License along with this program; if not, write to the Free Software +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

    + + +

    Links: Next Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/SGMLSpm/xtrainfo.html b/DOC/HTML/SGMLSpm/xtrainfo.html new file mode 100644 index 0000000..a2abd04 --- /dev/null +++ b/DOC/HTML/SGMLSpm/xtrainfo.html @@ -0,0 +1,81 @@ + + +Is there any extra information available from the SGML +document? + + + +

    Links: Next Previous Up Top

    + +

    Is there any extra information available from the SGML +document?

    + +

    The SGMLS +object which you created at the beginning of the parse has several +methods available in addition to next_event -- +you will find them all listed in table 7. There should normally be no need to +use the notation and entity +methods, since SGMLS.pm will look up entities and notations for you +automatically as needed.

    + + +

    Table 7: Additional methods for the SGMLS +class

    + +
    +
    +
    Method
    +
    next_event
    +
    Return Type
    +
    SGMLS_Event
    +
    Description
    +
    Return the next event.
    + +
    +
    + + +
    +
    Method
    +
    appinfo
    +
    Return Type
    +
    string
    +
    Description
    +
    Return the APPINFO parameter from the SGML declaration, if +any.
    + +
    +
    + + +
    +
    Method
    +
    notation(nname)
    +
    Return Type
    +
    SGMLS_Notation
    +
    Description
    +
    Look up a notation by name.
    + +
    +
    + + +
    +
    Method
    +
    entity(ename)
    +
    Return Type
    +
    SGMLS_Entity
    +
    Description
    +
    Look up an entity by name.
    + +
    +
    + + + +

    Links: Next Previous Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/sgmlspl/bugs.html b/DOC/HTML/sgmlspl/bugs.html new file mode 100644 index 0000000..b4b62b4 --- /dev/null +++ b/DOC/HTML/sgmlspl/bugs.html @@ -0,0 +1,20 @@ + + +Are there any bugs? + + + +

    Links: Previous Up Top

    + +

    Are there any bugs?

    + +

    Any bugs in SGMLS.pm will be here too, since sgmlspl relies +heavily on that perl5 library.

    + + +

    Links: Previous Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/sgmlspl/definition.html b/DOC/HTML/sgmlspl/definition.html new file mode 100644 index 0000000..eaf5ac9 --- /dev/null +++ b/DOC/HTML/sgmlspl/definition.html @@ -0,0 +1,42 @@ + + +What is sgmlspl? + + + +

    Links: Next Previous Up Top

    + +

    What is sgmlspl?

    + +

    sgmlspl is a sample application distributed with the +SGMLS.pm perl5 class library -- you can use it to convert +SGML documents to other formats by providing a specification file +detailing exactly how you want to handle each element, external data +entity, subdocument entity, CDATA string, record end, SDATA string, +and processing instruction. sgmlspl also uses the SGMLS::Output +library (included in this distribution) to allow you to redirect or +capture output.

    + +

    To use sgmlspl, you simply prepare a specification file +containing regular perl5 code. If your SGML document were named +doc.sgml, your sgmlspl specification file +were named, spec.pl, and the name of your new +file were doc.latex, then you could use the +following command in a Unix shell to convert your SGML document:

    + +

    +

    sgmls doc.sgml | sgmlspl spec.pl > doc.latex
    +

    +

    sgmlspl will pass any additional arguments on to the specification +file, which can process them in the regular perl5 fashion. The +specification files used to convert this manual -- +tolatex.pl and tohtml.pl +-- are available with the SGMLS.pm distribution.

    + + +

    Links: Next Previous Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/sgmlspl/dsssl.html b/DOC/HTML/sgmlspl/dsssl.html new file mode 100644 index 0000000..2e92610 --- /dev/null +++ b/DOC/HTML/sgmlspl/dsssl.html @@ -0,0 +1,34 @@ + + +Is sgmlspl the best way to convert SGML documents? + + + +

    Links: Next Previous Up Top

    + +

    Is sgmlspl the best way to convert SGML documents?

    + +

    Not necessarily. While sgmlspl is fully functional, it is not +always particularly intuitive or pleasant to use. There is a new +proposed standard, Document Style Semantics and +Specification Language (DSSSL), based +on the Scheme programming language, and +implementations should soon be available. To read more about the +DSSSL standard, see http://www.jclark.com/dsssl/ +on the Internet.

    + +

    That said, DSSSL is a declarative, +side-effect-free programming language, while sgmlspl allows you to +use any programming constructions available in perl5, including +those with side-effects. This means that if you want to do more than +simply format the document or convert it from one Document +Type Definition (DTD) to another, +sgmlspl might be a good choice.

    + + +

    Links: Next Previous Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/sgmlspl/forward.html b/DOC/HTML/sgmlspl/forward.html new file mode 100644 index 0000000..afb09b4 --- /dev/null +++ b/DOC/HTML/sgmlspl/forward.html @@ -0,0 +1,130 @@ + + +How should I handle forward references? + + + +

    Links: Next Previous Up Top

    + +

    How should I handle forward references?

    + +

    Because sgmlspl processes the document as a linear data +stream, from beginning to end, it is easy to refer +back to information, but relatively difficult to +refer forward, since you do not know what will be +coming later in the parse. Here are a few suggestions.

    + +

    First, you could use push_output and +pop_output to +save up output in a large string. When you have found the information +which you need, you can make any necessary modifications to the string +and print it then. This will work for relatively small chunks of a +document, but you would not want to try it for anything larger.

    + +

    Next, you could use the ext method to +add extra pointers, and build a parse tree of the whole document +before processing any of it. This method will work well for small +documents, but large documents will place some serious stress on your +system's memory and/or swapping.

    + +

    A more sophisticated solution, however, involves the +Refs.pm module, included in this +distribution. In your sgmlspl script, include the line

    + +

    +

    use SGMLS::Refs.pm;
    +

    +

    to activate the library. The library will create a database +file to keep track of references between passes, and to tell you if +any references have changed. For example, you might want to try +something like this:

    + +

    +

    sgml('start', sub {
    +  my $Refs = new SGMLS::Refs('references.refs');
    +});
    +sgml('end', sub {
    +  $Refs->warn;
    +  destroy $Refs;
    +});
    +

    +

    This code will create an object, $Refs, linked to a file of +references called references.refs. The +SGMLS::Refs class understands the methods +listed in table 4

    + + +

    Table 4: The SGMLS::Refs class

    + +
    +
    +
    Method
    +
    new(filename,[logfile_handle])
    +
    Return Type
    +
    SGMLS::Refs
    +
    Description
    +
    Create a new SGMLS::Refs object. +Arguments are the name of the hashfile and (optionally) a writable +filehandle for logging changes.
    + +
    +
    + + +
    +
    Method
    +
    get(key)
    +
    Return Type
    +
    string
    +
    Description
    +
    Look up a reference key in the hash file and return its value.
    + +
    +
    + + +
    +
    Method
    +
    put(key,value)
    +
    Return Type
    +
    [none]
    +
    Description
    +
    Set a new value for the key in the hashfile.
    + +
    +
    + + +
    +
    Method
    +
    count
    +
    Return Type
    +
    number
    +
    Description
    +
    Return the number of references whose values have changed (thus +far).
    + +
    +
    + + +
    +
    Method
    +
    warn
    +
    Return Type
    +
    1 or 0
    +
    Description
    +
    Print a warning mentioning the number of references which have +changed, and return 1 if a warning was printed.
    + +
    +
    + + + +

    Links: Next Previous Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/sgmlspl/generic.html b/DOC/HTML/sgmlspl/generic.html new file mode 100644 index 0000000..425ccdd --- /dev/null +++ b/DOC/HTML/sgmlspl/generic.html @@ -0,0 +1,185 @@ + + +What are the generic events? + + + +

    Links: Next Previous Up Top

    + +

    What are the generic events?

    + +

    sgmlspl recognises the twelve generic events listed in table +1. You may provide any one of these +as the first argument to sgml to declare a handler +(string or subroutine) for that event.

    + + +

    Table 1: sgmlspl generic events

    + +
    +
    +
    Event
    +
    'start'
    +
    Description
    +
    Execute handler (with no arguments) at +the beginning of the parse.
    + +
    +
    + + +
    +
    Event
    +
    'end'
    +
    Description
    +
    Execute handler (with no arguments) at +the end of the parse.
    + +
    +
    + + +
    +
    Event
    +
    'start_element'
    +
    Description
    +
    Execute handler at the beginning of every +element without a specific start handler.
    + +
    +
    + + +
    +
    Event
    +
    'end_element'
    +
    Description
    +
    Execute handler at the end of every +element without a specific end handler.
    + +
    +
    + + +
    +
    Event
    +
    'cdata'
    +
    Description
    +
    Execute handler for every character-data +string.
    + +
    +
    + + +
    +
    Event
    +
    'sdata'
    +
    Description
    +
    Execute handler for every special-data +string without a specific handler.
    + +
    +
    + + +
    +
    Event
    +
    're'
    +
    Description
    +
    Execute handler for every +record end.
    + +
    +
    + + +
    +
    Event
    +
    'pi'
    +
    Description
    +
    Execute handler for every processing +instruction.
    + +
    +
    + + +
    +
    Event
    +
    'entity'
    +
    Description
    +
    Execute handler for every external data +entity without a specific handler.
    + +
    +
    + + +
    +
    Event
    +
    'start_subdoc'
    +
    Description
    +
    Execute handler at the beginning of every +subdocument entity without a specific handler.
    + +
    +
    + + +
    +
    Event
    +
    'end_subdoc'
    +
    Description
    +
    Execute handler at the end of every +subdocument entity without a specific handler.
    + +
    +
    + + +
    +
    Event
    +
    'conforming'
    +
    Description
    +
    Execute handler once, at the end of the +document parse, if and only if the document was conforming.
    + +
    +
    + + +

    The handlers for all of these except the document events +'start' and 'end' will receive +two arguments whenever they are called: the first will be the data +associated with the event (if any), and the second will be the +SGMLS_Event object itself (see the document for +SGMLS.pm). Note the following example, which allows processing +instructions for including the date or the hostname in the document at +parse time:

    + +

    +

    sgml('pi', sub {
    +    my ($instruction) = @_;
    +    if ($instruction eq 'date') {
    +      output `date`;
    +    } elsif ($instruction eq 'hostname') {
    +      output `hostname`;
    +    } else {
    +      print STDERR "Warning: unknown processing instruction: $instruction\n";
    +    }
    +});
    +

    +

    With this handler, any occurance +of <?date> in the original SGML document +would be replaced by the current date and time, and any occurance of +<?hostname> would be replaced by the name of +the host.

    + + +

    Links: Next Previous Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/sgmlspl/handlers.html b/DOC/HTML/sgmlspl/handlers.html new file mode 100644 index 0000000..1e5fa9f --- /dev/null +++ b/DOC/HTML/sgmlspl/handlers.html @@ -0,0 +1,77 @@ + + +What about the handler argument? + + + +

    Links: Next Previous Up Top

    + +

    What about the handler argument?

    + +

    The second argument to the sgml subroutine is +the actual code or data associated with each event. If it is a +string, it will be printed literally using the +output subroutine from the SGMLS::Output library; if +it is a reference to a perl5 subroutine, the subroutine will be +called whenever the event occurs. The following three +sgml commands will have identical results:

    + +

    +

    # Example 1
    +sgml('<DOC>', "\\begin{document}\n");
    +
    +# Example 2
    +sgml('<DOC>', sub {
    +  output "\\begin{document}\n";
    +});
    +
    +# Example 3
    +sub do_begin_document { output "\\begin{document}\n"; }
    +sgml('<DOC>', \&do_begin_document);
    +

    +

    For simply printing a string, of course, it does not make sense +to use a subroutine; however, the subroutines can be useful when you +need to check the value of an attribute, perform different actions in +different contexts, or perform other types of relatively more +complicated post-processing.

    + +

    If your handler is a subroutine, then it will receive two +arguments: the SGMLS.pm event's data, and the SGMLS.pm event +itself (see the SGMLS.pm documentation for a description +of event and data types). The following example will print +'\begin{enumerate}' if the value of the attribute +TYPE is 'ORDERED', and +'\begin{itemize}' if the value of the attribute +TYPE is 'UNORDERED':

    + +

    +

    sgml('<LIST>', sub {
    +  my ($element,$event) = @_;
    +  my $type = $element->attribute('TYPE')->value;
    +
    +  if ($type eq 'ORDERED') {
    +    output "\\begin{enumerate}\n";
    +  } elsif ($type eq 'UNORDERED') {
    +    output "\\begin{itemize}\n";
    +  } else {
    +    die "Bad TYPE '$type' for element LIST at line " .
    +      $event->line . " in " . $event->file . "\n";
    +  }
    +});
    +

    +

    You will not always need to use the event +argument, but it can be useful if you want to report line numbers or +file names for errors (presuming that you called sgmls or nsgmls +with the -l option). If you have a new version +of nsgmls which accepts the -h option, you +can also use the event argument to look up +arbitrary entities declared by the program. See the SGMLS_Event documentation for +more information.

    + + +

    Links: Next Previous Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/sgmlspl/installation.html b/DOC/HTML/sgmlspl/installation.html new file mode 100644 index 0000000..3c4e46b --- /dev/null +++ b/DOC/HTML/sgmlspl/installation.html @@ -0,0 +1,29 @@ + + +How do I install sgmlspl on my system? + + + +

    Links: Next Previous Up Top

    + +

    How do I install sgmlspl on my system?

    + +

    To use sgmlspl, you need to install SGMLS.pm on your system, +by copying the SGMLS.pm file to a directory searched by perl5. +You also need to install SGMLS::Output in the same directory, and +sgmlspl (with execute permission) somewhere on your +PATH. The easiest way to do all of this on a Unix +system is to change to the root directory of this distribution +(SGMLSpm), edit the Makefile +appropriately, and type

    + +

    +

    make install
    +

    + +

    Links: Next Previous Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/sgmlspl/output.html b/DOC/HTML/sgmlspl/output.html new file mode 100644 index 0000000..e96dc91 --- /dev/null +++ b/DOC/HTML/sgmlspl/output.html @@ -0,0 +1,40 @@ + + +Why does sgmlspl use output instead of +print? + + + +

    Links: Next Previous Up Top

    + +

    Why does sgmlspl use output instead of +print?

    + +

    sgmlspl uses a special perl5 library SGMLS::Output for +printing text. SGMLS::Output exports the subroutines +output(string...), +push_output(type[,data]), +and pop_output. The subroutine +output works much like the regular perl5 function +print, except that you are not able to specify a +file handle, and you may include multiple strings as arguments.

    + +

    When you want to write data to somewhere other than +STDOUT (the default), then you use the subroutines +push_output and +pop_output to set +a new destination or to restore an old one.

    + +

    You can use the SGMLS::Output package in other programs by adding +the following line:

    + +

    +

    use SGMLS::Output;
    +

    + +

    Links: Next Previous Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/sgmlspl/outputex.html b/DOC/HTML/sgmlspl/outputex.html new file mode 100644 index 0000000..55f81c0 --- /dev/null +++ b/DOC/HTML/sgmlspl/outputex.html @@ -0,0 +1,37 @@ + + +How about an example for output? + + + +

    Links: Next Previous Up Top

    + +

    How about an example for output?

    + +

    Here is a simple example to demonstrate how output, push_output, and +pop_output work:

    + +

    +

    output "Hello, world!\n";               # (Written to STDOUT by default)
    +push_output('nul');                     # Push 'nul' ahead of STDOUT
    +output "Hello, again!\n";               # (Discarded)
    +push_output('file','foo.out');          # Push file 'foo.out' ahead of 'nul'
    +output "Hello, again!\n";               # (Written to the file 'foo.out')
    +pop_output;                             # Pop 'foo.out' and revert to 'nul'
    +output "Hello, again!\n";               # (Discarded)
    +push_output('string');                  # Push 'string' ahead of 'nul'
    +output "Hello, ";                       # (Written to the string)
    +output "again!\n";                      # (Also written to the string)
    +                                        # Pop the string "Hello, again!\n"
    +$foo = pop_output;                      # and revert to 'nul'
    +output "Hello, again!\n";               # (Discarded)
    +pop_output;                             # Pop 'nul' and revert to STDOUT
    +output "Hello, at last!\n";             # (Written to STDOUT)
    +

    + +

    Links: Next Previous Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/sgmlspl/popoutput.html b/DOC/HTML/sgmlspl/popoutput.html new file mode 100644 index 0000000..8e05bd9 --- /dev/null +++ b/DOC/HTML/sgmlspl/popoutput.html @@ -0,0 +1,27 @@ + + +How do I use pop_output? + + + +

    Links: Next Previous Up Top

    + +

    How do I use pop_output?

    + +

    When you want to restore the previous output after using push_output, simply +call the subroutine pop_output. If the output type +was a string, pop_output will return the string +(containing all of the output); otherwise, the return value is not +useful.

    + +

    Usually, you will want to use push_output in +the start handler for an element or subdocument entity, and +pop_output in the end handler.

    + + +

    Links: Next Previous Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/sgmlspl/pushoutput.html b/DOC/HTML/sgmlspl/pushoutput.html new file mode 100644 index 0000000..8a7aad8 --- /dev/null +++ b/DOC/HTML/sgmlspl/pushoutput.html @@ -0,0 +1,116 @@ + + +How do I use push_output? + + + +

    Links: Next Previous Up Top

    + +

    How do I use push_output?

    + +

    The subroutine +push_output(type[,data]) +takes two arguments: the type, which is always +required, and the data, which is needed for +certain types of output. Table 3 lists the different types which you +can push onto the output stack.

    + + +

    Table 3: Types for push_output

    + +
    +
    +
    Type
    +
    'handle'
    +
    Data
    +
    a filehandle
    +
    Description
    +
    Send all output to the supplied filehandle.
    + +
    +
    + + +
    +
    Type
    +
    'file'
    +
    Data
    +
    a filename
    +
    Description
    +
    Open the supplied file for writing, erasing its current +contents (if any), and send all output to it.
    + +
    +
    + + +
    +
    Type
    +
    'append'
    +
    Data
    +
    a filename
    +
    Description
    +
    Open the supplied file for writing and append all output to its +current contents.
    + +
    +
    + + +
    +
    Type
    +
    'pipe'
    +
    Data
    +
    a shell command
    +
    Description
    +
    Pipe all output to the supplied shell command.
    + +
    +
    + + +
    +
    Type
    +
    'string'
    +
    Data
    +
    a string [optional]
    +
    Description
    +
    Append all output to the supplied string, which will be +returned by pop_output.
    + +
    +
    + + +
    +
    Type
    +
    'nul'
    +
    Data
    +
    [none]
    +
    Description
    +
    Ignore all output.
    + +
    +
    + + +

    Because the output is stack-based, you do not lose the previous +output destination when you push a new one. This is especially +convenient for dealing with data in tree-structures, like SGML data +-- for example, you can capture the contents of sub-elements as +strings, ignore certain types of elements, and split the output from +one SGML parse into a series of sub-files. Here are some examples:

    + +

    +

    push_output('string');                  # append output to an empty string
    +push_output('file','/tmp/foo');         # send output to this file
    +push_output('pipe','mail webmaster');   # mail output to 'webmaster' (!!)
    +push_output('nul');                     # just ignore all output
    +

    + +

    Links: Next Previous Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/sgmlspl/sgmlspl.html b/DOC/HTML/sgmlspl/sgmlspl.html new file mode 100644 index 0000000..ac8c3bf --- /dev/null +++ b/DOC/HTML/sgmlspl/sgmlspl.html @@ -0,0 +1,32 @@ + + +sgmlspl: a simple post-processor for SGMLS and NSGMLS (for use +with SGMLS.pm version 1.03) + + +

    sgmlspl: a simple post-processor for SGMLS and NSGMLS (for use +with SGMLS.pm version 1.03)

    + +

    Welcome to sgmlspl, a simple sample perl5 application which +uses the SGMLS.pm class library.

    + +
  • Terms
  • +
  • What is sgmlspl?
  • +
  • How do I install sgmlspl on my system?
  • +
  • Is sgmlspl the best way to convert SGML documents?
  • +
  • How does the specification file tell sgmlspl what to do?
  • +
  • What about the handler argument?
  • +
  • What are the generic events?
  • +
  • What are the specific events?
  • +
  • Why does sgmlspl use output instead of +print?
  • +
  • How do I use push_output?
  • +
  • How do I use pop_output?
  • +
  • How about an example for output?
  • +
  • Is there an easier way to make specification files?
  • +
  • How should I handle forward references?
  • +
  • Are there any bugs?
  • + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/sgmlspl/sgmlspl.refs b/DOC/HTML/sgmlspl/sgmlspl.refs new file mode 100644 index 0000000..2f8f5b9 --- /dev/null +++ b/DOC/HTML/sgmlspl/sgmlspl.refs @@ -0,0 +1,78 @@ +{ + '' => '', + 'title:specific' => 'What are the specific events?', + 'previous:definition.html' => 'terms.html', + 'previous:dsssl.html' => 'installation.html', + 'firstname:sgmlspl' => 'David', + 'title:definition' => 'What is sgmlspl?', + 'xref:table.events.generic' => '1', + 'orgdiv:sgmlspl' => 'Department of English', + 'up:specific.html' => 'sgmlspl.html', + 'previous:handlers.html' => 'specs.html', + 'previous:pushoutput.html' => 'output.html', + 'next:skel.html' => 'forward.html', + 'next:output.html' => 'pushoutput.html', + 'title:pushoutput' => 'How do I use push_output?', + 'title:bugs' => 'Are there any bugs?', + 'next:installation.html' => 'dsssl.html', + 'previous:generic.html' => 'handlers.html', + 'next:specs.html' => 'handlers.html', + 'previous:popoutput.html' => 'pushoutput.html', + 'title:popoutput' => 'How do I use pop_output?', + 'up:specs.html' => 'sgmlspl.html', + 'xref:table.output.push.output' => '3', + 'email:sgmlspl' => 'dmeggins@aix1.uottawa.ca', + 'next:dsssl.html' => 'specs.html', + 'title:table.class.refs' => 'The SGMLS::Refs class', + 'title:terms' => 'Terms', + 'next:terms.html' => 'definition.html', + 'previous:skel.html' => 'outputex.html', + 'next:outputex.html' => 'skel.html', + 'title:skel' => 'Is there an easier way to make specification files?', + 'previous:output.html' => 'specific.html', + 'up:definition.html' => 'sgmlspl.html', + 'up:dsssl.html' => 'sgmlspl.html', + 'previous:installation.html' => 'definition.html', + 'up:terms.html' => 'sgmlspl.html', + 'title:output' => 'Why does sgmlspl use output instead of +print?', + 'up:handlers.html' => 'sgmlspl.html', + 'up:pushoutput.html' => 'sgmlspl.html', + 'title:generic' => 'What are the generic events?', + 'next:generic.html' => 'specific.html', + 'title:specs' => 'How does the specification file tell sgmlspl what to do?', + 'title:table.events.specific' => 'Specific event types', + 'next:popoutput.html' => 'outputex.html', + 'up:generic.html' => 'sgmlspl.html', + 'next:specific.html' => 'output.html', + 'up:popoutput.html' => 'sgmlspl.html', + 'previous:outputex.html' => 'popoutput.html', + 'previous:bugs.html' => 'forward.html', + 'title:handlers' => 'What about the handler argument?', + 'title:installation' => 'How do I install sgmlspl on my system?', + 'xref:table.events.specific' => '2', + 'previous:forward.html' => 'skel.html', + 'up:skel.html' => 'sgmlspl.html', + 'title:table.events.generic' => 'sgmlspl generic events', + 'up:output.html' => 'sgmlspl.html', + 'up:installation.html' => 'sgmlspl.html', + 'surname:sgmlspl' => 'Megginson', + 'xref:table.class.refs' => '4', + 'title:sgmlspl' => 'sgmlspl: a simple post-processor for SGMLS and NSGMLS (for use +with SGMLS.pm version 1.03)', + 'previous:specific.html' => 'generic.html', + 'title:dsssl' => 'Is sgmlspl the best way to convert SGML documents?', + 'next:definition.html' => 'installation.html', + 'orgname:sgmlspl' => 'University of Ottawa', + 'title:outputex' => 'How about an example for output?', + 'next:handlers.html' => 'generic.html', + 'next:pushoutput.html' => 'popoutput.html', + 'up:outputex.html' => 'sgmlspl.html', + 'title:table.output.push.output' => 'Types for push_output', + 'title:forward' => 'How should I handle forward references?', + 'next:forward.html' => 'bugs.html', + 'up:bugs.html' => 'sgmlspl.html', + 'up:forward.html' => 'sgmlspl.html', + 'previous:specs.html' => 'dsssl.html', + '' => '' +} diff --git a/DOC/HTML/sgmlspl/skel.html b/DOC/HTML/sgmlspl/skel.html new file mode 100644 index 0000000..5c2a798 --- /dev/null +++ b/DOC/HTML/sgmlspl/skel.html @@ -0,0 +1,29 @@ + + +Is there an easier way to make specification files? + + + +

    Links: Next Previous Up Top

    + +

    Is there an easier way to make specification files?

    + +

    Yes. The script skel.pl, included in this +package, is an sgmlspl specification which writes a specification +(!!!). To use it under Unix, try something like

    + +

    +

    sgmls foo.sgml | sgmlspl skel.pl > foo-spec.pl
    +

    +

    (presuming that there is a copy of skel.pl +in the current directory or in a directory searched by perl5) to +generate a new, blank template named +foo-spec.pl.

    + + +

    Links: Next Previous Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/sgmlspl/specific.html b/DOC/HTML/sgmlspl/specific.html new file mode 100644 index 0000000..b62b412 --- /dev/null +++ b/DOC/HTML/sgmlspl/specific.html @@ -0,0 +1,118 @@ + + +What are the specific events? + + + +

    Links: Next Previous Up Top

    + +

    What are the specific events?

    + +

    In addition to the generic +events listed in the previous section, sgmlspl allows +special, specific handlers for the beginning and end of elements and +subdocument entities, for SDATA strings, and for external data +entities. Table 2 lists the +different specific event types available.

    + + +

    Table 2: Specific event types

    + +
    +
    +
    Event
    +
    '<GI>'
    +
    Description
    +
    Execute handler at the beginning of +every element named 'GI'.
    + +
    +
    + + +
    +
    Event
    +
    '</GI>'
    +
    Description
    +
    Execute handler at the end of every +element named 'GI'.
    + +
    +
    + + +
    +
    Event
    +
    '|SDATA|'
    +
    Description
    +
    Execute handler for every special-data +string 'SDATA'.
    + +
    +
    + + +
    +
    Event
    +
    '&ENTITY;'
    +
    Description
    +
    Execute handler for every external data +entity named 'ENTITY'.
    + +
    +
    + + +
    +
    Event
    +
    '{ENTITY}'
    +
    Description
    +
    Execute handler at the beginning of +every subdocument entity named 'ENTITY'.
    + +
    +
    + + +
    +
    Event
    +
    '{/ENTITY}'
    +
    Description
    +
    Execute handler at the end of every +subdocument entity named 'ENTITY'.
    + +
    +
    + + +

    Note that these override the generic-event handlers. For example, if you +were to type

    + +

    +

    sgml('&FOO;', sub {
    +    output "Found a \"foo\" entity!\n";
    +});
    +
    +sgml('entity', sub {
    +    output "Found an entity!\n";
    +});
    +

    +

    And the external data entity &FOO; +appeared in your SGML document, sgmlspl would call the first +handler rather than the second.

    + +

    Note also that start and end handlers are entirely separate +things: if an element has a specific start handler but no specific end +handler, the generic end handler will still be called at the end of +the element. To prevent this, declare a handler with an empty string:

    + +

    +

    sgml('</HACK>', '');
    +

    + +

    Links: Next Previous Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/sgmlspl/specs.html b/DOC/HTML/sgmlspl/specs.html new file mode 100644 index 0000000..114ea15 --- /dev/null +++ b/DOC/HTML/sgmlspl/specs.html @@ -0,0 +1,41 @@ + + +How does the specification file tell sgmlspl what to do? + + + +

    Links: Next Previous Up Top

    + +

    How does the specification file tell sgmlspl what to do?

    + +

    sgmlspl uses an event model rather than +a procedural model -- instead of +saying "do A then B then C" you say "whenever X +happens, do A; whenever Y happens, do B; whenever Z happens, do +C". In other words, while you design the code, sgmlspl +decides when and how often to run it.

    + +

    The specification file, which contains your instructions, is +regular perl5 code, and you can define packages and subroutines, +display information, read files, create variables, etc. For +processing the SGML document, however, sgmlspl exports a single +subroutine, sgml(event, +handler), into the 'main' package +-- each time you call sgml, you declare a +handler for a specific type of sgmls event, and sgmlspl will then +execute that handler every time the event occurs. You may use +sgml to declare a handler for a generic event, like +'start_element', or a specific event, +like '<DOC>' -- a specific event will +always take precedence over a generic event, so when the +DOC element begins, sgmlspl will execute the +'<DOC>' handler rather than the +'start_element' handler.

    + + +

    Links: Next Previous Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/HTML/sgmlspl/terms.html b/DOC/HTML/sgmlspl/terms.html new file mode 100644 index 0000000..d6991bc --- /dev/null +++ b/DOC/HTML/sgmlspl/terms.html @@ -0,0 +1,32 @@ + + +Terms + + + +

    Links: Next Up Top

    + +

    Terms

    + +

    This program, along with its documentation, is free software; +you can redistribute it and/or modify it under the terms of the GNU +General Public License as published by the Free Software Foundation; +either version 2 of the License, or (at your option) any later +version.

    + +

    This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details.

    + +

    You should have received a copy of the GNU General Public +License along with this program; if not, write to the Free Software +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

    + + +

    Links: Next Up Top

    + + +
    David Megginson <dmeggins@aix1.uottawa.ca>
    + + diff --git a/DOC/LaTeX/sample.pl b/DOC/LaTeX/sample.pl new file mode 120000 index 0000000..9efacbd --- /dev/null +++ b/DOC/LaTeX/sample.pl @@ -0,0 +1 @@ +../sample.pl \ No newline at end of file diff --git a/DOC/LaTeX/sgmlspl.refs b/DOC/LaTeX/sgmlspl.refs new file mode 100644 index 0000000..cb42f50 --- /dev/null +++ b/DOC/LaTeX/sgmlspl.refs @@ -0,0 +1,24 @@ +{ + '' => '', + 'title:SPECIFIC' => 'What are the specific events?', + 'title:DEFINITION' => 'What is {\\sc sgmlspl}?', + 'title:DSSSL' => 'Is {\\sc sgmlspl} the best way to convert {\\sc SGML} documents?', + 'title:TABLE.CLASS.REFS' => 'The SGMLS::Refs class', + 'title:TERMS' => 'Terms', + 'title:OUTPUTEX' => 'How about an example for {\\tt output}?', + 'title:SKEL' => 'Is there an easier way to make specification files?', + 'title:BUGS' => 'Are there any bugs?', + 'title:PUSHOUTPUT' => 'How do I use {\\tt push\\_output}?', + 'title:OUTPUT' => 'Why does {\\sc sgmlspl} use {\\tt output} instead of +{\\tt print}?', + 'title:HANDLERS' => 'What about the {\\tt\\sl handler\\/} argument?', + 'title:INSTALLATION' => 'How do I install {\\sc sgmlspl} on my system?', + 'title:TABLE.OUTPUT.PUSH.OUTPUT' => 'Types for {\\tt push\\_output}', + 'title:FORWARD' => 'How should I handle forward references?', + 'title:TABLE.EVENTS.GENERIC' => '{\\sc sgmlspl} generic events', + 'title:GENERIC' => 'What are the generic events?', + 'title:SPECS' => 'How does the specification file tell {\\sc sgmlspl} what to do?', + 'title:TABLE.EVENTS.SPECIFIC' => 'Specific event types', + 'title:POPOUTPUT' => 'How do I use {\\tt pop\\_output}?', + '' => '' +} diff --git a/DOC/LaTeX/sgmlspl.tex b/DOC/LaTeX/sgmlspl.tex new file mode 100644 index 0000000..e2560f8 --- /dev/null +++ b/DOC/LaTeX/sgmlspl.tex @@ -0,0 +1,575 @@ +\documentstyle[11pt]{article} + +\setlength{\parskip}{3ex} +\raggedright + +\title{sgmlspl: a simple post-processor for SGMLS and NSGMLS (for use +with {\sc SGMLS.pm} version 1.03)} +\author{David Megginson \\ + Department of English, \\ + University of Ottawa, \\ + Email: {\tt dmeggins@aix1.uottawa.ca} \\ +} + + +\begin{document} +\maketitle + + +Welcome to {\sc sgmlspl}, a simple sample {\sc perl5} application which +uses the {\sc SGMLS.pm} class library. + + +{\em\section{Terms} +\label{TERMS} + + +This program, along with its documentation, is free software; +you can redistribute it and/or modify it under the terms of the GNU +General Public License as published by the Free Software Foundation; +either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public +License along with this program; if not, write to the Free Software +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +} + + + +\section{What is {\sc sgmlspl}?} +\label{DEFINITION} + + +{\sc sgmlspl} is a sample application distributed with the +{\sc SGMLS.pm} {\sc perl5} class library {---} you can use it to convert +{\sc SGML} documents to other formats by providing a {\em specification file\/} +detailing exactly how you want to handle each element, external data +entity, subdocument entity, CDATA string, record end, SDATA string, +and processing instruction. {\sc sgmlspl} also uses the {\sc SGMLS::Output} +library (included in this distribution) to allow you to redirect or +capture output. + +To use {\sc sgmlspl}, you simply prepare a specification file +containing regular {\sc perl5} code. If your {\sc SGML} document were named +{\tt doc.sgml}, your {\sc sgmlspl} specification file +were named, {\tt spec.pl}, and the name of your new +file were {\tt doc.latex}, then you could use the +following command in a Unix shell to convert your {\sc SGML} document: + +{\footnotesize\begin{verbatim} +sgmls doc.sgml | sgmlspl spec.pl > doc.latex +\end{verbatim}} + +{\sc sgmlspl} will pass any additional arguments on to the specification +file, which can process them in the regular {\sc perl5} fashion. The +specification files used to convert this manual {---} +{\tt tolatex.pl} and {\tt tohtml.pl} +{---} are available with the {\sc SGMLS.pm} distribution. + + + + +\section{How do I install {\sc sgmlspl} on my system?} +\label{INSTALLATION} + + +To use {\sc sgmlspl}, you need to install {\sc SGMLS.pm} on your system, +by copying the {\sc SGMLS.pm} file to a directory searched by {\sc perl5}. +You also need to install {\sc SGMLS::Output} in the same directory, and +{\sc sgmlspl} (with execute permission) somewhere on your +{\tt PATH}. The easiest way to do all of this on a Unix +system is to change to the root directory of this distribution +({\tt SGMLSpm}), edit the {\tt Makefile} +appropriately, and type + +{\footnotesize\begin{verbatim} +make install +\end{verbatim}} + + + + +\section{Is {\sc sgmlspl} the best way to convert {\sc SGML} documents?} +\label{DSSSL} + + +Not necessarily. While {\sc sgmlspl} is fully functional, it is not +always particularly intuitive or pleasant to use. There is a new +proposed standard, {\em Document Style Semantics and +Specification Language\/} ({\sc DSSSL}), based +on the {\sc Scheme} programming language, and +implementations should soon be available. To read more about the +{\sc DSSSL} standard, see {\tt http://www.jclark.com/dsssl/} +on the Internet. + +That said, {\sc DSSSL} is a declarative, +side-effect-free programming language, while {\sc sgmlspl} allows you to +use any programming constructions available in {\sc perl5}, including +those with side-effects. This means that if you want to do more than +simply format the document or convert it from one {\em Document +Type Definition\/} ({\sc DTD}) to another, +{\sc sgmlspl} might be a good choice. + + + + +\section{How does the specification file tell {\sc sgmlspl} what to do?} +\label{SPECS} + + +{\sc sgmlspl} uses an {\em event model\/} rather than +a {\em procedural model\/} {---} instead of +saying {``}do A then B then C{''} you say {``}whenever X +happens, do A; whenever Y happens, do B; whenever Z happens, do +C{''}. In other words, while you design the code, {\sc sgmlspl} +decides when and how often to run it. + +The specification file, which contains your instructions, is +regular {\sc perl5} code, and you can define packages and subroutines, +display information, read files, create variables, etc. For +processing the {\sc SGML} document, however, {\sc sgmlspl} exports a single +subroutine, {\tt sgml({\tt\sl event\/}, +{\tt\sl handler\/})}, into the 'main' package +{---} each time you call {\tt sgml}, you declare a +handler for a specific type of {\sc sgmls} event, and {\sc sgmlspl} will then +execute that handler every time the event occurs. You may use +{\tt sgml} to declare a handler for a {\em generic event\/}, like +{\tt 'start\_element'}, or a {\em specific event\/}, +like {\tt '$<$DOC$>$'} {---} a specific event will +always take precedence over a generic event, so when the +{\tt DOC} element begins, {\sc sgmlspl} will execute the +{\tt '$<$DOC$>$'} handler rather than the +{\tt 'start\_element'} handler. + + + + +\section{What about the {\tt\sl handler\/} argument?} +\label{HANDLERS} + + +The second argument to the {\tt sgml} subroutine is +the actual code or data associated with each event. If it is a +string, it will be printed literally using the +{\tt output} subroutine from the {\sc SGMLS::Output} library; if +it is a reference to a {\sc perl5} subroutine, the subroutine will be +called whenever the event occurs. The following three +{\tt sgml} commands will have identical results: + +{\footnotesize\begin{verbatim} +# Example 1 +sgml('', "\\begin{document}\n"); + +# Example 2 +sgml('', sub { + output "\\begin{document}\n"; +}); + +# Example 3 +sub do_begin_document { output "\\begin{document}\n"; } +sgml('', \&do_begin_document); +\end{verbatim}} + +For simply printing a string, of course, it does not make sense +to use a subroutine; however, the subroutines can be useful when you +need to check the value of an attribute, perform different actions in +different contexts, or perform other types of relatively more +complicated post-processing. + +If your handler is a subroutine, then it will receive two +arguments: the {\sc SGMLS.pm} event's data, and the {\sc SGMLS.pm} event +itself (see the {\sc SGMLS.pm} documentation for a description +of event and data types). The following example will print +{\tt '\verb|\|begin\{enumerate\}'} if the value of the attribute +{\tt TYPE} is {\tt 'ORDERED'}, and +{\tt '\verb|\|begin\{itemize\}'} if the value of the attribute +{\tt TYPE} is {\tt 'UNORDERED'}: + +{\footnotesize\begin{verbatim} +sgml('', sub { + my ($element,$event) = @_; + my $type = $element->attribute('TYPE')->value; + + if ($type eq 'ORDERED') { + output "\\begin{enumerate}\n"; + } elsif ($type eq 'UNORDERED') { + output "\\begin{itemize}\n"; + } else { + die "Bad TYPE '$type' for element LIST at line " . + $event->line . " in " . $event->file . "\n"; + } +}); +\end{verbatim}} + +You will not always need to use the {\tt\sl event\/} +argument, but it can be useful if you want to report line numbers or +file names for errors (presuming that you called {\sc sgmls} or {\sc nsgmls} +with the {\tt\sl -l\/} option). If you have a new version +of {\sc nsgmls} which accepts the {\tt\sl -h\/} option, you +can also use the {\tt\sl event\/} argument to look up +arbitrary entities declared by the program. See the SGMLS\_Event documentation for +more information. + + + + +\section{What are the generic events?} +\label{GENERIC} + + +{\sc sgmlspl} recognises the twelve generic events listed in table +\ref{TABLE.EVENTS.GENERIC}. You may provide any one of these +as the first argument to {\tt sgml} to declare a handler +(string or subroutine) for that event. + +\begin{table}[htbp] +\footnotesize +\caption{{\sc sgmlspl} generic events} +\label{TABLE.EVENTS.GENERIC} +\vspace{2ex}\begin{tabular}{l|l} +\parbox[c]{2.225in}{\raggedright\vspace{4pt} Event\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Description\vspace{4pt}} \\ \hline\hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt 'start'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} (with no arguments) at +the beginning of the parse.\vspace{4pt}} \\ \hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt 'end'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} (with no arguments) at +the end of the parse.\vspace{4pt}} \\ \hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt 'start\_element'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} at the beginning of every +element without a specific start handler.\vspace{4pt}} \\ \hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt 'end\_element'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} at the end of every +element without a specific end handler.\vspace{4pt}} \\ \hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt 'cdata'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} for every character-data +string.\vspace{4pt}} \\ \hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt 'sdata'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} for every special-data +string without a specific handler.\vspace{4pt}} \\ \hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt 're'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} for every +record end.\vspace{4pt}} \\ \hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt 'pi'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} for every processing +instruction.\vspace{4pt}} \\ \hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt 'entity'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} for every external data +entity without a specific handler.\vspace{4pt}} \\ \hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt 'start\_subdoc'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} at the beginning of every +subdocument entity without a specific handler.\vspace{4pt}} \\ \hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt 'end\_subdoc'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} at the end of every +subdocument entity without a specific handler.\vspace{4pt}} \\ \hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt 'conforming'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} once, at the end of the +document parse, if and only if the document was conforming.\vspace{4pt}} \\ \hline +\end{tabular}\end{table} + +The handlers for all of these except the document events +{\tt 'start'} and {\tt 'end'} will receive +two arguments whenever they are called: the first will be the data +associated with the event (if any), and the second will be the +{\tt SGMLS\_Event} object itself (see the document for +{\sc SGMLS.pm}). Note the following example, which allows processing +instructions for including the date or the hostname in the document at +parse time: + +{\footnotesize\begin{verbatim} +sgml('pi', sub { + my ($instruction) = @_; + if ($instruction eq 'date') { + output `date`; + } elsif ($instruction eq 'hostname') { + output `hostname`; + } else { + print STDERR "Warning: unknown processing instruction: $instruction\n"; + } +}); +\end{verbatim}} + +With this handler, any occurance +of {\tt $<$?date$>$} in the original {\sc SGML} document +would be replaced by the current date and time, and any occurance of +{\tt $<$?hostname$>$} would be replaced by the name of +the host. + + + + +\section{What are the specific events?} +\label{SPECIFIC} + + +In addition to the generic +events listed in the previous section, {\sc sgmlspl} allows +special, specific handlers for the beginning and end of elements and +subdocument entities, for SDATA strings, and for external data +entities. Table \ref{TABLE.EVENTS.SPECIFIC} lists the +different specific event types available. + +\begin{table}[htbp] +\footnotesize +\caption{Specific event types} +\label{TABLE.EVENTS.SPECIFIC} +\vspace{2ex}\begin{tabular}{l|l} +\parbox[c]{2.225in}{\raggedright\vspace{4pt} Event\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Description\vspace{4pt}} \\ \hline\hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt '$<$GI$>$'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} at the beginning of +every element named {\tt 'GI'}.\vspace{4pt}} \\ \hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt '$<$/GI$>$'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} at the end of every +element named {\tt 'GI'}.\vspace{4pt}} \\ \hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt '|SDATA|'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} for every special-data +string {\tt 'SDATA'}.\vspace{4pt}} \\ \hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt '\&ENTITY;'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} for every external data +entity named {\tt 'ENTITY'}.\vspace{4pt}} \\ \hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt '\{ENTITY\}'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} at the beginning of +every subdocument entity named {\tt 'ENTITY'}.\vspace{4pt}} \\ \hline +\parbox[c]{2.225in}{\raggedright\vspace{4pt} {\tt '\{/ENTITY\}'}\vspace{4pt}} & \parbox[c]{2.225in}{\raggedright\vspace{4pt} Execute {\tt\sl handler\/} at the end of every +subdocument entity named {\tt 'ENTITY'}.\vspace{4pt}} \\ \hline +\end{tabular}\end{table} + +Note that these override the generic-event handlers. For example, if you +were to type + +{\footnotesize\begin{verbatim} +sgml('&FOO;', sub { + output "Found a \"foo\" entity!\n"; +}); + +sgml('entity', sub { + output "Found an entity!\n"; +}); +\end{verbatim}} + +And the external data entity {\tt \&FOO;} +appeared in your {\sc SGML} document, {\sc sgmlspl} would call the first +handler rather than the second. + +Note also that start and end handlers are entirely separate +things: if an element has a specific start handler but no specific end +handler, the generic end handler will still be called at the end of +the element. To prevent this, declare a handler with an empty string: + +{\footnotesize\begin{verbatim} +sgml('', ''); +\end{verbatim}} + + + + +\section{Why does {\sc sgmlspl} use {\tt output} instead of +{\tt print}?} +\label{OUTPUT} + + +{\sc sgmlspl} uses a special {\sc perl5} library {\sc SGMLS::Output} for +printing text. {\sc SGMLS::Output} exports the subroutines +{\tt output({\tt\sl string\/}{\ldots})}, +{\tt push\_output({\tt\sl type\/}[,{\tt\sl data\/}])}, +and {\tt pop\_output}. The subroutine +{\tt output} works much like the regular {\sc perl5} function +{\tt print}, except that you are not able to specify a +file handle, and you may include multiple strings as arguments. + +When you want to write data to somewhere other than +{\tt STDOUT} (the default), then you use the subroutines +{\tt push\_output} and +{\tt pop\_output} to set +a new destination or to restore an old one. + +You can use the {\sc SGMLS::Output} package in other programs by adding +the following line: + +{\footnotesize\begin{verbatim} +use SGMLS::Output; +\end{verbatim}} + + + + +\section{How do I use {\tt push\_output}?} +\label{PUSHOUTPUT} + + +The subroutine +{\tt push\_output({\tt\sl type\/}[,{\tt\sl data\/}])} +takes two arguments: the {\tt\sl type\/}, which is always +required, and the {\tt\sl data\/}, which is needed for +certain types of output. Table \ref{TABLE.OUTPUT.PUSH.OUTPUT} lists the different types which you +can push onto the output stack. + +\begin{table}[htbp] +\footnotesize +\caption{Types for {\tt push\_output}} +\label{TABLE.OUTPUT.PUSH.OUTPUT} +\vspace{2ex}\begin{tabular}{l|l|l} +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Type\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Data\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Description\vspace{4pt}} \\ \hline\hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt 'handle'}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} a filehandle\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Send all output to the supplied filehandle.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt 'file'}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} a filename\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Open the supplied file for writing, erasing its current +contents (if any), and send all output to it.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt 'append'}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} a filename\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Open the supplied file for writing and append all output to its +current contents.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt 'pipe'}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} a shell command\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Pipe all output to the supplied shell command.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt 'string'}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} a string [optional]\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Append all output to the supplied string, which will be +returned by {\tt pop\_output}.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt 'nul'}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} [none]\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Ignore all output.\vspace{4pt}} \\ \hline +\end{tabular}\end{table} + +Because the output is stack-based, you do not lose the previous +output destination when you push a new one. This is especially +convenient for dealing with data in tree-structures, like {\sc SGML} data +{---} for example, you can capture the contents of sub-elements as +strings, ignore certain types of elements, and split the output from +one {\sc SGML} parse into a series of sub-files. Here are some examples: + +{\footnotesize\begin{verbatim} +push_output('string'); # append output to an empty string +push_output('file','/tmp/foo'); # send output to this file +push_output('pipe','mail webmaster'); # mail output to 'webmaster' (!!) +push_output('nul'); # just ignore all output +\end{verbatim}} + + + + +\section{How do I use {\tt pop\_output}?} +\label{POPOUTPUT} + + +When you want to restore the previous output after using {\tt push\_output}, simply +call the subroutine {\tt pop\_output}. If the output type +was a string, {\tt pop\_output} will return the string +(containing all of the output); otherwise, the return value is not +useful. + +Usually, you will want to use {\tt push\_output} in +the start handler for an element or subdocument entity, and +{\tt pop\_output} in the end handler. + + + + +\section{How about an example for {\tt output}?} +\label{OUTPUTEX} + + +Here is a simple example to demonstrate how {\tt output}, {\tt push\_output}, and +{\tt pop\_output} work: + +{\footnotesize\begin{verbatim} +output "Hello, world!\n"; # (Written to STDOUT by default) +push_output('nul'); # Push 'nul' ahead of STDOUT +output "Hello, again!\n"; # (Discarded) +push_output('file','foo.out'); # Push file 'foo.out' ahead of 'nul' +output "Hello, again!\n"; # (Written to the file 'foo.out') +pop_output; # Pop 'foo.out' and revert to 'nul' +output "Hello, again!\n"; # (Discarded) +push_output('string'); # Push 'string' ahead of 'nul' +output "Hello, "; # (Written to the string) +output "again!\n"; # (Also written to the string) + # Pop the string "Hello, again!\n" +$foo = pop_output; # and revert to 'nul' +output "Hello, again!\n"; # (Discarded) +pop_output; # Pop 'nul' and revert to STDOUT +output "Hello, at last!\n"; # (Written to STDOUT) +\end{verbatim}} + + + + +\section{Is there an easier way to make specification files?} +\label{SKEL} + + +Yes. The script {\tt skel.pl}, included in this +package, is an {\sc sgmlspl} specification which writes a specification +(!!!). To use it under Unix, try something like + +{\footnotesize\begin{verbatim} +sgmls foo.sgml | sgmlspl skel.pl > foo-spec.pl +\end{verbatim}} + +(presuming that there is a copy of {\tt skel.pl} +in the current directory or in a directory searched by {\sc perl5}) to +generate a new, blank template named +{\tt foo-spec.pl}. + + + + +\section{How should I handle forward references?} +\label{FORWARD} + + +Because {\sc sgmlspl} processes the document as a linear data +stream, from beginning to end, it is easy to refer +{\em back\/} to information, but relatively difficult to +refer {\em forward\/}, since you do not know what will be +coming later in the parse. Here are a few suggestions. + +First, you could use {\tt push\_output} and +{\tt pop\_output} to +save up output in a large string. When you have found the information +which you need, you can make any necessary modifications to the string +and print it then. This will work for relatively small chunks of a +document, but you would not want to try it for anything larger. + +Next, you could use the {\tt ext} method to +add extra pointers, and build a parse tree of the whole document +before processing any of it. This method will work well for small +documents, but large documents will place some serious stress on your +system's memory and/or swapping. + +A more sophisticated solution, however, involves the +{\sc Refs.pm} module, included in this +distribution. In your {\sc sgmlspl} script, include the line + +{\footnotesize\begin{verbatim} +use SGMLS::Refs.pm; +\end{verbatim}} + +to activate the library. The library will create a database +file to keep track of references between passes, and to tell you if +any references have changed. For example, you might want to try +something like this: + +{\footnotesize\begin{verbatim} +sgml('start', sub { + my $Refs = new SGMLS::Refs('references.refs'); +}); +sgml('end', sub { + $Refs->warn; + destroy $Refs; +}); +\end{verbatim}} + +This code will create an object, \$Refs, linked to a file of +references called {\tt references.refs}. The +{\tt SGMLS::Refs} class understands the methods +listed in table \ref{TABLE.CLASS.REFS} + +\begin{table}[htbp] +\footnotesize +\caption{The SGMLS::Refs class} +\label{TABLE.CLASS.REFS} +\vspace{2ex}\begin{tabular}{l|l|l} +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Method\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return Type\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Description\vspace{4pt}} \\ \hline\hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt new}({\tt\sl filename\/},[{\tt\sl logfile\_handle\/}])\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt SGMLS::Refs}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Create a new {\tt SGMLS::Refs} object. +Arguments are the name of the hashfile and (optionally) a writable +filehandle for logging changes.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt get}({\tt\sl key\/})\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Look up a reference key in the hash file and return its value.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt put}({\tt\sl key\/},{\tt\sl value\/})\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} [none]\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Set a new value for the key in the hashfile.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt count}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} number\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return the number of references whose values have changed (thus +far).\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt warn}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} 1 or 0\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Print a warning mentioning the number of references which have +changed, and return 1 if a warning was printed.\vspace{4pt}} \\ \hline +\end{tabular}\end{table} + + + + +\section{Are there any bugs?} +\label{BUGS} + + +Any bugs in {\sc SGMLS.pm} will be here too, since {\sc sgmlspl} relies +heavily on that {\sc perl5} library. + + +\end{document} diff --git a/DOC/LaTeX/sgmlspm.refs b/DOC/LaTeX/sgmlspm.refs new file mode 100644 index 0000000..6462b67 --- /dev/null +++ b/DOC/LaTeX/sgmlspm.refs @@ -0,0 +1,29 @@ +{ + '' => '', + 'title:SGMLS' => 'How do I use {\\sc SGMLS.pm}?', + 'title:PERL5' => 'How do I program in {\\sc perl5}?', + 'title:SGMLSEVENT' => 'So what do I do with an event?', + 'title:EXTEND' => 'How do I design my {\\em own\\/} classes?', + 'title:DEFINITION' => 'What is {\\sc SGMLS.pm}?', + 'title:TERMS' => 'Terms', + 'title:TABLE.CLASS.SGMLS.ENTITY' => 'The {\\tt SGMLS\\_Entity} class', + 'title:XTRAINFO' => 'Is there any extra information available from the {\\sc SGML} +document?', + 'title:EXAMPLE' => 'How about a simple example?', + 'title:TABLE.CLASS.SGMLS.EVENT' => 'The {\\tt SGMLS\\_Event} types', + 'title:SGMLSELEMENT' => 'What do I do with an {\\tt SGMLS\\_Element}?', + 'title:BUGS' => 'Are there any bugs?', + 'title:SGMLSNOTATION' => 'What do I do with an {\\tt SGMLS\\_Notation}?', + 'title:TABLE.CLASS.SGMLS.EXTRA' => 'Additional methods for the {\\tt SGMLS} +class', + 'title:SGML' => 'How do I produce {\\sc SGML} documents?', + 'title:EVENTS' => 'What are the different event types and data?', + 'title:TABLE.CLASS.SGMLS.ELEMENT' => 'The {\\tt SGMLS\\_Element} class', + 'title:TABLE.CLASS.SGMLS.NOTATION' => 'The {\\tt SGMLS\\_Notation class}', + 'title:SGMLSATTRIBUTE' => 'What do I do with an +{\\tt SGMLS\\_Attribute}?', + 'title:TABLE.CLASS.SGMLS' => 'The {\\tt SGMLS\\_Event} class', + 'title:TABLE.CLASS.SGMLS.ATTRIBUTE' => 'The {\\tt SGMLS\\_Attribute} class', + 'title:SGMLSENTITY' => 'What do I do with an {\\tt SGMLS\\_Entity}?', + '' => '' +} diff --git a/DOC/LaTeX/sgmlspm.tex b/DOC/LaTeX/sgmlspm.tex new file mode 100644 index 0000000..11c42bb --- /dev/null +++ b/DOC/LaTeX/sgmlspm.tex @@ -0,0 +1,583 @@ +\documentstyle[11pt]{article} + +\setlength{\parskip}{3ex} +\raggedright + +\title{SGMLS.pm: a perl5 class library for handling output from the +SGMLS and NSGMLS parsers (version 1.03)} +\author{David Megginson \\ + Department of English, \\ + University of Ottawa, \\ + Email: {\tt dmeggins@aix1.uottawa.ca} \\ +} + + +\begin{document} +\maketitle + + +Welcome to {\sc SGMLS.pm}, an extensible {\sc perl5} class library for +processing the output from the {\sc sgmls} and {\sc nsgmls} parsers. +{\sc SGMLS.pm} is free, copyrighted software available by anonymous ftp in +the directory ftp://aix1.uottawa.ca/pub/dmeggins/. +You might also want to look at the documentation for {\sc sgmlspl}, +a simple sample script which uses {\sc SGMLS.pm} to convert documents from +{\sc SGML} to other formats. + + +{\em\section{Terms} +\label{TERMS} + + +This program, along with its documentation, is free software; +you can redistribute it and/or modify it under the terms of the GNU +General Public License as published by the Free Software Foundation; +either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public +License along with this program; if not, write to the Free Software +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +} + + + +\section{What is {\sc SGMLS.pm}?} +\label{DEFINITION} + + +{\sc SGMLS.pm} is an extensible {\sc perl5} +class library for parsing the output from James Clark's popular +{\sc sgmls} and {\sc nsgmls} parsers, available on the Internet at {\tt ftp://jclark.com}. +This is {\em not\/} a complete system for translating +documents written the the {\em Standard Generalised Markup +Language\/} ({\sc SGML}) into other formats, but it can easily +form the basis of such a system (for a simple example, see the {\sc sgmlspl} +program included in this package). + +The library recognises four basic types of {\sc SGML} objects: the +{\em element\/}, the +{\em attribute\/}, +the {\em notation\/}, and the +{\em entity\/}; each +of these is a fully-developed class with methods for accessing +important information. + + + + +\section{How do I produce {\sc SGML} documents?} +\label{SGML} + + +I am presuming here that you are already experienced with {\sc SGML} +and the {\sc sgmls} or {\sc nsgmls} parser. For help with the parsers see the +manual pages accompanying each one; for help with {\sc SGML} see Robin +Cover's SGML Web Page at {\tt http://www.sil.org/sgml/sgml.html} +on the Internet. + + + + +\section{How do I program in {\sc perl5}?} +\label{PERL5} + + +If you have to ask this question, you probably should not be +trying to use this library right now, since it is intended only for +experienced {\sc perl5} programmers. That said, however, you can find the +{\sc perl5} documentation with the {\sc perl5} source distribution or on the +World-Wide Web at {\tt http://www.metronet.com/0/perlinfo/perl5/manual/perl.html}. + +{\em Please\/} do not write to me for help on using +{\sc perl5}. + + + + +\section{How do I use {\sc SGMLS.pm}?} +\label{SGMLS} + + +First, you need to copy the file {\sc SGMLS.pm} to a directory where +perl can find it (on a Unix system, it might be +{\tt /usr/lib/perl5} or +{\tt /usr/local/lib/perl5}, or whatever the environment +variable {\tt PERL5LIB} is set to) and make certain that it +is world-readable. + +Next, near the top of your {\sc perl5} program, type the following +line: + +{\footnotesize\begin{verbatim} +use SGMLS; +\end{verbatim}} + +You must then open up a file handle from which {\sc SGMLS.pm} can read the +data from an {\sc sgmls} or {\sc nsgmls} process, unless you are reading from +a standard handle like {\tt STDIN} {---} for example, +if you are piping the output from {\sc sgmls} to a {\sc perl5} script, using +something like + +{\footnotesize\begin{verbatim} +sgmls foo.sgml | perl myscript.pl +\end{verbatim}} + +then the predefined filehandle {\tt STDIN} will be +sufficient. In DOS, you might want to dump the sgmls output to a file +and use it as standard input (or open it explicitly in perl), and in +Unix, you might actually want to open a pipe or socket for the input. +{\sc SGMLS.pm} doesn't need to seek, so any input stream should +work. + +To parse the {\sc sgmls} or {\sc nsgmls} output from the handle, create +a new object instance of the {\tt SGMLS} class with +the handle as an argument, i.e. + +{\footnotesize\begin{verbatim} +$parse = new SGMLS(STDIN); +\end{verbatim}} + +(You may create more than one {\tt SGMLS} +object at once, but each object {\em must\/} have a +unique handle pointing to a unique stream, or +{\em chaos\/} will result.) Now, you can retrieve and +process events using the {\tt next\_event} method: + +{\footnotesize\begin{verbatim} +while ($event = $parse->next_event) { + #do something with each event +} +\end{verbatim}} + + + + +\section{So what do I do with an event?} +\label{SGMLSEVENT} + + +The {\tt next\_event} method for the {\tt SGMLS} class returns an +object belonging to the class {\tt SGMLS\_Event}. +This class has several methods available, as listed in table \ref{TABLE.CLASS.SGMLS}. + +\begin{table}[htbp] +\footnotesize +\caption{The {\tt SGMLS\_Event} class} +\label{TABLE.CLASS.SGMLS} +\vspace{2ex}\begin{tabular}{l|l|l} +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Method\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return Type\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Description\vspace{4pt}} \\ \hline\hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt type}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return the type of the event.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt data}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string, {\tt SGMLS\_Element}, or +{\tt SGMLS\_Entity}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return any data associated with the event.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt file}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return the name of the {\sc SGML} source file which generated the +event, if available.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt line}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return the line number of the {\sc SGML} source file which +generated the event, if available.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt element}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt SGMLS\_Element}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return the element in force when the event was +generated.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt parse}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return the {\tt SGMLS} object for the current +parse.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt entity({\tt\sl ename\/})}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Look up an entity from those currently known to the parse. An +alias for {\tt ->parse->entity(\$ename)}\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt notation({\tt\sl nname\/})}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Look up the notation from those currently known to the parse: +an alias for {\tt ->parse->notation(\$nname)}.\vspace{4pt}} \\ \hline +\end{tabular}\end{table} + +The {\tt file} and {\tt line} methods +will return useful information only if you called {\sc sgmls} or {\sc nsgmls} +with the {\tt\sl -l\/} flag to include file and +line-number information. + + + + +\section{What are the different event types and data?} +\label{EVENTS} + + +Table \ref{TABLE.CLASS.SGMLS.EVENT} lists the ten +different event types returned by the {\tt next\_event} +method of an {\tt SGMLS} +object and the different types of data associated with each of these +(note that these do {\em not\/} correspond to the +standard {\sc ESIS} events). + +\begin{table}[htbp] +\footnotesize +\caption{The {\tt SGMLS\_Event} types} +\label{TABLE.CLASS.SGMLS.EVENT} +\vspace{2ex}\begin{tabular}{l|l|l} +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Event Type\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Event Data\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Description\vspace{4pt}} \\ \hline\hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt 'start\_element'}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt SGMLS\_Element}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The beginning of an element.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt 'end\_element'}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt SGMLS\_Element}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The end of an element.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt 'cdata'}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Regular character data.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt 'sdata'}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Special system data.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt 're'}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} [none]\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} A record-end (i.e., a newline).\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt 'pi'}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} A processing instruction\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt 'entity'}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt SGMLS\_Entity}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} A non-SGML external entity.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt 'start\_subdoc'}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt SGMLS\_Entity}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The beginning of an SGML subdocument.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt 'end\_subdoc'}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt SGMLS\_Entity}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The end of an SGML subdocument.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt 'conforming'}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} [none]\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The document was valid.\vspace{4pt}} \\ \hline +\end{tabular}\end{table} + +For example, if {\tt \$event->type} returns +{\tt 'start\_element'}, then +{\tt \$event->data} will return an object belonging to the +{\tt SGMLS\_Element} +class (which will contain a list of attributes, etc. {---} see +below), {\tt \$event->file} and +{\tt \$event->line} will return the file and line-number +in which the element appeared (if you called {\sc sgmls} or {\sc nsgmls} with +the {\tt\sl -l\/} flag), and +{\tt \$event->element} will return the element currently +in force (in this case, the same as +{\tt \$event->data}). + + + + +\section{What do I do with an {\tt SGMLS\_Element}?} +\label{SGMLSELEMENT} + + +Altogether, there are six classes in {\sc SGMLS.pm}, each with its +own methods: in addition to {\tt SGMLS} (for the parse) and +{\tt SGMLS\_Event} +(for a specific event), the classes are +{\tt SGMLS\_Element}, {\tt SGMLS\_Attribute}, +{\tt SGMLS\_Entity}, +and {\tt SGMLS\_Notation}. +Like all of these, {\tt SGMLS\_Element} has a number +of methods available for obtaining different types of information. +For example, if you were to use + +{\footnotesize\begin{verbatim} +my $element = $event->data +\end{verbatim}} + +to retrieve the data for a {\tt 'start\_element'} or +{\tt 'end\_element'} event, then you could use the methods +listed in table \ref{TABLE.CLASS.SGMLS.ELEMENT} to find more +information about the element. + +\begin{table}[htbp] +\footnotesize +\caption{The {\tt SGMLS\_Element} class} +\label{TABLE.CLASS.SGMLS.ELEMENT} +\vspace{2ex}\begin{tabular}{l|l|l} +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Method\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return Type\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Description\vspace{4pt}} \\ \hline\hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt name}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The name (or GI), in upper-case.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt parent}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt SGMLS\_Element}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The parent element, or {\tt ''} if this is the top +element.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt attributes}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} HASH\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return a reference to a hash table of +{\tt SGMLS\_Attribute} objects, keyed by the attribute +names (in upper-case).\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt attribute\_names}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} ARRAY\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} A list of all attribute names for the current element (in +upper-case).\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt attribute({\tt\sl aname\/})}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt SGMLS\_Attribute}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return the attribute named ANAME.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt set\_attribute({\tt\sl attribute\/})}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} [none]\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The {\tt\sl attribute\/} argument should be an +object belonging to the {\tt SGMLS\_Attribute} +class. Add it to the element, replacing any previous attribute with +the same name.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt in({\tt\sl name\/})}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt SGMLS\_Element}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} If the current element's parent is named +{\tt\sl name\/}, return the parent; otherwise, return +{\tt ''}.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt within({\tt\sl name\/})}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt SGMLS\_Element}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} If any ancestor of the current element is named +{\tt\sl name\/}, return it; otherwise, return +{\tt ''}.\vspace{4pt}} \\ \hline +\end{tabular}\end{table} + + + + +\section{What do I do with an +{\tt SGMLS\_Attribute}?} +\label{SGMLSATTRIBUTE} + + +Note that objects of the {\tt SGMLS\_Attribute} +class do not have events in their own right, and are available only +through the {\tt attributes} or +{\tt attribute({\tt\sl aname\/})} methods for +{\tt SGMLS\_Element} +objects. An object belonging to the +{\tt SGMLS\_Attribute} class will recognise the +methods listed in table \ref{TABLE.CLASS.SGMLS.ATTRIBUTE}. + +\begin{table}[htbp] +\footnotesize +\caption{The {\tt SGMLS\_Attribute} class} +\label{TABLE.CLASS.SGMLS.ATTRIBUTE} +\vspace{2ex}\begin{tabular}{l|l|l} +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Method\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return Type\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Description\vspace{4pt}} \\ \hline\hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt name}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The name of the attribute (in upper-case).\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt type}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The type of the attribute: {\tt 'IMPLIED'}, +{\tt 'CDATA'}, {\tt 'NOTATION'}, +{\tt 'ENTITY'}, or {\tt 'TOKEN'}.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt value}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string, {\tt SGMLS\_Entity}, or +{\tt SGMLS\_Notation}.\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The value of the attribute. If the type is +{\tt 'CDATA'} or {\tt 'TOKEN'}, it will be a +simple string; if it is {\tt 'NOTATION'} it will be an +object belonging to the {\tt SGMLS\_Notation} class, +and if it is {\tt 'Entity'} it will be an object +belonging to the {\tt SGMLS\_Entity} class.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt is\_implied}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} boolean\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return true if the value of the attribute is implied, or false if +it has an explicit value.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt set\_type({\tt\sl type\/})}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} [none]\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Provide a new type for the current attribute -- no sanity +checking will be performed, so be careful.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt set\_value({\tt\sl value\/})}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} [none]\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Provide a new value for the current attribute -- no sanity +checking will be performed, so be careful.\vspace{4pt}} \\ \hline +\end{tabular}\end{table} + +Note that the type {\tt 'TOKEN'} includes both +individual tokens and lists of tokens (ie {\tt 'TOKENS'}, +{\tt 'IDS'}, or {\tt 'IDREFS'} in the +original {\sc SGML} document), so you might need to use the perl function +'split' to break the value string into a list. + + + + +\section{What do I do with an {\tt SGMLS\_Entity}?} +\label{SGMLSENTITY} + + +An {\tt SGMLS\_Entity} object can come in an +{\tt 'entity'} event (in +which case it is always external), in a +{\tt 'start\_subdoc'} or {\tt 'end\_subdoc'} +event (in which case it always has the type +{\tt 'SUBDOC'}), or as the value of an attribute (in +which case it may be internal or external). An object belonging to +the {\tt SGMLS\_Entity} class may use the methods +listed in table \ref{TABLE.CLASS.SGMLS.ENTITY}. + +\begin{table}[htbp] +\footnotesize +\caption{The {\tt SGMLS\_Entity} class} +\label{TABLE.CLASS.SGMLS.ENTITY} +\vspace{2ex}\begin{tabular}{l|l|l} +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Method\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return Type\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Description\vspace{4pt}} \\ \hline\hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt name}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The entity name.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt type}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The entity type: {\tt 'CDATA'}, +{\tt 'SDATA'}, {\tt 'NDATA'}, or +{\tt 'SUBDOC'}.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt value}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The entity replacement text (internal entities +only).\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt sysid}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The system identifier (external entities only).\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt pubid}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The public identifier (external entities only).\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt filenames}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} ARRAY\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} A list of file names generated from the sysid and pubid +(external entities only).\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt notation}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt SGMLS\_Notation}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The associated notation (external data entities only).\vspace{4pt}} \\ \hline +\end{tabular}\end{table} + +An entity of type {\tt 'SUBDOC'} will have a sysid +and pubid, and external data entity will have a sysid, pubid, +filenames, and a notation, and an internal data entity will have a +value. + + + + +\section{What do I do with an {\tt SGMLS\_Notation}?} +\label{SGMLSNOTATION} + + +The fourth data class is the notation, which is available only +as a return value from the {\tt notation} method of an +{\tt SGMLS\_Entity} +or the {\tt value} method of an {\tt SGMLS\_Attribute} +with type {\tt 'NOTATION'}. You can use the notation to +decide how to treat non-SGML data (such as graphics). An object +belonging to the {\tt SGMLS\_Notation} class will have +access to the methods listed in table \ref{TABLE.CLASS.SGMLS.NOTATION}. + +\begin{table}[htbp] +\footnotesize +\caption{The {\tt SGMLS\_Notation class}} +\label{TABLE.CLASS.SGMLS.NOTATION} +\vspace{2ex}\begin{tabular}{l|l|l} +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Method\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return Type\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Description\vspace{4pt}} \\ \hline\hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt name}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The notation's name.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt sysid}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The notation's system identifier.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt pubid}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} The notation's public identifier.\vspace{4pt}} \\ \hline +\end{tabular}\end{table} + +What you do with this information is +{\em entirely\/} up to you. + + + + +\section{Is there any extra information available from the {\sc SGML} +document?} +\label{XTRAINFO} + + +The {\tt SGMLS} +object which you created at the beginning of the parse has several +methods available in addition to {\tt next\_event} {---} +you will find them all listed in table \ref{TABLE.CLASS.SGMLS.EXTRA}. There should normally be no need to +use the {\tt notation} and {\tt entity} +methods, since {\sc SGMLS.pm} will look up entities and notations for you +automatically as needed. + +\begin{table}[htbp] +\footnotesize +\caption{Additional methods for the {\tt SGMLS} +class} +\label{TABLE.CLASS.SGMLS.EXTRA} +\vspace{2ex}\begin{tabular}{l|l|l} +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Method\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return Type\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Description\vspace{4pt}} \\ \hline\hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt next\_event}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt SGMLS\_Event}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return the next event.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt appinfo}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} string\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Return the APPINFO parameter from the {\sc SGML} declaration, if +any.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt notation({\tt\sl nname\/})}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt SGMLS\_Notation}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Look up a notation by name.\vspace{4pt}} \\ \hline +\parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt entity({\tt\sl ename\/})}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} {\tt SGMLS\_Entity}\vspace{4pt}} & \parbox[c]{1.48333333333333in}{\raggedright\vspace{4pt} Look up an entity by name.\vspace{4pt}} \\ \hline +\end{tabular}\end{table} + + + + +\section{How about a simple example?} +\label{EXAMPLE} + + +OK. The following script simply reports its events: + +{\footnotesize\begin{verbatim} +#!/usr/bin/perl + +use SGMLS; + +$this_parse = new SGMLS(STDIN); # Read from standard input. + +while ($this_event = $this_parse->next_event) { + my $type = $this_event->type; + my $data = $this_event->data; + SWITCH: { + $type eq 'start_element' && do { + print "Beginning element: " . $data->name . "\n"; + last SWITCH; + }; + $type eq 'end_element' && do { + print "Ending element: " . $data->name . "\n"; + last SWITCH; + }; + $type eq 'cdata' && do { + print "Character data: " . $data . "\n"; + last SWITCH; + }; + $type eq 'sdata' && do { + print "Special data: " . $data . "\n"; + last SWITCH; + }; + $type eq 're' && do { + print "Record End\n"; + last SWITCH; + }; + $type eq 'pi' && do { + print "Processing Instruction: " . $data . "\n"; + last SWITCH; + }; + $type eq 'entity' && do { + print "External Data Entity: " . $data->name . + " with notation " . $data->notation->name . "\n"; + last SWITCH; + }; + $type eq 'start_subdoc' && do { + print "Beginning Subdocument Entity: " . $data->name . "\n"; + last SWITCH; + }; + $type eq 'end_subdoc' && do { + print "Ending Subdocument Entity: " . $data->name . "\n"; + last SWITCH; + }; + $type eq 'conforming' && do { + print "This is a conforming SGML document\n"; + last SWITCH; + }; + } +} + +\end{verbatim}} + +To use it under Unix, try something like + +{\footnotesize\begin{verbatim} +sgmls document.sgml | perl sample.pl +\end{verbatim}} + +and watch the output scroll down. + + + + +\section{How do I design my {\em own\/} classes?} +\label{EXTEND} + + +In addition to the methods listed above, all of the classes used +in {\sc SGMLS.pm} have an {\tt ext} method which returns a +reference to an initially-empty hash table. You are free to use this +hash table to store {\em anything\/} you want {---} it +should be especially useful if you are building your own, derived +classes from the ones provided here. The following example builds a +derived class {\tt My\_Element} from the {\tt SGMLS\_Element} +class, adding methods to set and get the current font: + +{\footnotesize\begin{verbatim} +use SGMLS; + +package My_Element; +@ISA = qw(SGMLS_Element); + +sub new { + my ($class,$element,$font) = @_; + $element->ext->{'font'} = $font; + return bless $element; +} + +sub get_font { + my ($self) = @_; + return $self->ext->{'font'}; +} + +sub set_font { + my ($self,$font) = @_; + $self->ext->{'font'} = $font; +} +\end{verbatim}} + +Note that the derived class does not need to have any knowledge +about the underlying structure of the {\tt SGMLS\_Element} +class, and need only avoid shadowing any of the methods currently +existing there. + +If you decide to create a derived class from the {\tt SGMLS}, please note that in +addition to the methods listed above, that class uses internal methods +named {\tt element}, {\tt line}, and +{\tt file}, similar to the same methods in {\tt SGMLS\_Event} {---} +it is essential that you not shadow these method names. + + + + +\section{Are there any bugs?} +\label{BUGS} + + +Of course! Right now, {\sc SGMLS.pm} silently ignores link attributes +({\sc nsgmls} only) and data attributes, and there may be many other bugs +which I have not yet found. + + +\end{document} diff --git a/DOC/Makefile b/DOC/Makefile new file mode 100644 index 0000000..96c0831 --- /dev/null +++ b/DOC/Makefile @@ -0,0 +1,124 @@ +######################################################################## +# Makefile for producing HTML and LaTeX (and thence, DVI and +# Postscript) versions of the SGML documentation. +# +# version 1.03 +# +# *** NOTE *** +# +# This will _not_ work unless you have the Docbook 2.2.1 DTD properly +# installed on your system, and sgmls or nsgmls can find it using the +# public identifier "-//HaL and O'Reilly//DTD DocBook//EN". You will +# also need the character-entity files with the public identifiers +# +# "ISO 8879:1986//ENTITIES Publishing//EN" +# and +# "ISO 8879:1986//ENTITIES Numeric and Special Graphic//EN" +# +# installed. Copies of the source files for these are included in +# the Extras/ subdirectory. +######################################################################## + +# +# Beginning of user configuration options. +# + +# Which program do you use: sgmls or nsgmls? +SGMLS = nsgmls + +# What is the command for running LaTeX on a file? +LATEX = latex + +# Do you want to supply a full path for sgmlspl? +SGMLSPL = sgmlspl + +# If you want Postscript, what is the command for converting DVI to Postscript? +DVIPS = dvips + +# What SGML declaration do you use with the Docbook 2.2.1 DTD? +SGMLDECL = /usr/local/lib/sgml/sgmldecl/docbook.dcl + +# +# End of user configuration options. +# + +SHELL = /bin/sh +LATEXSPEC = tolatex.pl +HTMLSPEC = tohtml.pl + +all: latex html + +latex: LaTeX/sgmlspm.tex LaTeX/sgmlspl.tex +dvi: LaTeX/sgmlspm.dvi LaTeX/sgmlspl.dvi +ps: PS/sgmlspm.ps PS/sgmlspl.ps +html: HTML/SGMLSpm/sgmlspm.html HTML/sgmlspl/sgmlspl.html + + +LaTeX/sgmlspm.tex: sgmlspm.sgml ${LATEXSPEC} + @echo Converting sgmlspm.sgml to LaTeX...; \ + cd LaTeX; ln -sf ../sample.pl .; \ + if [ \! -r sgmlspm.refs -o -r .redo_sgmlspm ]; then \ + echo \(Preliminary LaTeX pass for sgmlspm.sgml...\); \ + ${SGMLS} ${SGMLDECL} ../sgmlspm.sgml \ + | $(SGMLSPL) ../${LATEXSPEC} sgmlspm > sgmlspm.tex; \ + fi; \ + echo \(Main LaTeX pass for sgmlspm.sgml...\); \ + ${SGMLS} ${SGMLDECL} ../sgmlspm.sgml \ + | $(SGMLSPL) ../${LATEXSPEC} sgmlspm >sgmlspm.tex + +LaTeX/sgmlspm.dvi: LaTeX/sgmlspm.tex + @echo Converting sgmlspm.tex to DVI...; \ + cd LaTeX; ${LATEX} sgmlspm.tex; ${LATEX} sgmlspm.tex + +PS/sgmlspm.ps: LaTeX/sgmlspm.dvi + @echo Converting sgmlspm.dvi to Postscript...; \ + ${DVIPS} -o PS/sgmlspm.ps LaTeX/sgmlspm.dvi + +HTML/SGMLSpm/sgmlspm.html: sgmlspm.sgml ${HTMLSPEC} + @cd HTML/SGMLSpm; ln -sf ../../sample.pl .; \ + echo Converting sgmlspm.sgml to HTML...; \ + if [ \! -r sgmlspm.refs -o -r .redo_sgmlspm ]; then \ + echo \(Preliminary HTML pass for sgmlspm.sgml...\); \ + ${SGMLS} ${SGMLDECL} ../../sgmlspm.sgml \ + | $(SGMLSPL) ../../${HTMLSPEC} sgmlspm; \ + fi; \ + echo \(Main HTML pass for sgmlspm.sgml...\); \ + ${SGMLS} ${SGMLDECL} ../../sgmlspm.sgml \ + | $(SGMLSPL) ../../${HTMLSPEC} sgmlspm; + +LaTeX/sgmlspl.tex: sgmlspl.sgml ${LATEXSPEC} + @echo Converting sgmlspl.sgml to LaTeX...; \ + cd LaTeX; \ + if [ \! -r sgmlspl.refs -o -r .redo_sgmlspl ]; then \ + echo \(Preliminary LaTeX pass for sgmlspl.sgml...\); \ + ${SGMLS} ${SGMLDECL} ../sgmlspl.sgml \ + | $(SGMLSPL) ../${LATEXSPEC} sgmlspl > sgmlspl.tex; \ + fi; \ + echo \(Main LaTeX pass for sgmlspl.sgml...\); \ + ${SGMLS} ${SGMLDECL} ../sgmlspl.sgml \ + | $(SGMLSPL) ../${LATEXSPEC} sgmlspl > sgmlspl.tex + +LaTeX/sgmlspl.dvi: LaTeX/sgmlspl.tex + @echo Converting sgmlspl.tex to DVI...; \ + cd LaTeX; \ + ${LATEX} sgmlspl.tex; ${LATEX} sgmlspl.tex + +PS/sgmlspl.ps: LaTeX/sgmlspl.dvi + @echo Converting sgmlspl.dvi to Postscript...; \ + ${DVIPS} -o PS/sgmlspl.ps LaTeX/sgmlspl.dvi + +HTML/sgmlspl/sgmlspl.html: sgmlspl.sgml ${HTMLSPEC} + @cd HTML/sgmlspl; \ + echo Converting sgmlspl.sgml to HTML...; \ + if [ ! -r sgmlspl.refs -o -r .redo_sgmlspl ]; then \ + echo \(Preliminary HTML pass for sgmlspl.sgml...\); \ + ${SGMLS} ${SGMLDECL} ../../sgmlspl.sgml \ + | $(SGMLSPL) ../../${HTMLSPEC} sgmlspl; \ + fi; \ + echo \(Main HTML pass for sgmlspl.sgml...\); \ + ${SGMLS} ${SGMLDECL} ../../sgmlspl.sgml \ + | $(SGMLSPL) ../../${HTMLSPEC} sgmlspl; + +clean: + rm -f *~ core LaTeX/* HTML/SGMLSpm/* HTML/sgmlspl/* PS/* + diff --git a/DOC/README b/DOC/README new file mode 100644 index 0000000..a9df29a --- /dev/null +++ b/DOC/README @@ -0,0 +1,11 @@ +This directory contains the SGML source files documention SGMLS.pm and +sgmlspl. If you are working in Unix or a Unix-like environment, make +any necessary changes in the Makefile then type one of the following: + + make latex Generate LaTeX source files of the documentation. + make dvi Generate a printable DVI version of the documentation. + make ps Generate a printable Postscript version of the docs. + make html Generate an HTML version of the documentation. + make all Generate _all_ of the above. + make clean Remove all generated files. + diff --git a/DOC/TODO b/DOC/TODO new file mode 100644 index 0000000..78ccbc6 --- /dev/null +++ b/DOC/TODO @@ -0,0 +1,4 @@ +SGMLS.pm documentation: things to do + +- move to the new HTML table standard instead of breaking the tables +up into lists. \ No newline at end of file diff --git a/DOC/sample.pl b/DOC/sample.pl new file mode 100644 index 0000000..27bf221 --- /dev/null +++ b/DOC/sample.pl @@ -0,0 +1,53 @@ +#!/usr/bin/perl + +use SGMLS; + +$this_parse = new SGMLS(STDIN); # Read from standard input. + +while ($this_event = $this_parse->next_event) { + my $type = $this_event->type; + my $data = $this_event->data; + SWITCH: { + $type eq 'start_element' && do { + print "Beginning element: " . $data->name . "\n"; + last SWITCH; + }; + $type eq 'end_element' && do { + print "Ending element: " . $data->name . "\n"; + last SWITCH; + }; + $type eq 'cdata' && do { + print "Character data: " . $data . "\n"; + last SWITCH; + }; + $type eq 'sdata' && do { + print "Special data: " . $data . "\n"; + last SWITCH; + }; + $type eq 're' && do { + print "Record End\n"; + last SWITCH; + }; + $type eq 'pi' && do { + print "Processing Instruction: " . $data . "\n"; + last SWITCH; + }; + $type eq 'entity' && do { + print "External Data Entity: " . $data->name . + " with notation " . $data->notation->name . "\n"; + last SWITCH; + }; + $type eq 'start_subdoc' && do { + print "Beginning Subdocument Entity: " . $data->name . "\n"; + last SWITCH; + }; + $type eq 'end_subdoc' && do { + print "Ending Subdocument Entity: " . $data->name . "\n"; + last SWITCH; + }; + $type eq 'conforming' && do { + print "This is a conforming SGML document\n"; + last SWITCH; + }; + } +} diff --git a/DOC/sgmlspl.sgml b/DOC/sgmlspl.sgml new file mode 100644 index 0000000..8d7ce88 --- /dev/null +++ b/DOC/sgmlspl.sgml @@ -0,0 +1,780 @@ + + %ISOpub; + + %ISOnum; + + SGML"> + ESIS"> + SGMLS.pm"> + SGMLS::Output"> + sgmlspl"> + perl5"> + sgmls"> + nsgmls"> + +]> + +
    + + + +sgmlspl: a simple post-processor for SGMLS and NSGMLS (for use +with &sgmls.pm; version 1.03) + + + +David +Megginson + +University of Ottawa +Department of English +
    dmeggins@aix1.uottawa.ca
    +
    +
    +
    + +[unpublished] + +
    + +Welcome to &sgmlspl;, a simple sample &perl5; application which +uses the &sgmls.pm; class library. + + +Terms + +This program, along with its documentation, is free software; +you can redistribute it and/or modify it under the terms of the GNU +General Public License as published by the Free Software Foundation; +either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public +License along with this program; if not, write to the Free Software +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + + + +What is &sgmlspl;? + +&sgmlspl; is a sample application distributed with the +&sgmls.pm; &perl5; class library — you can use it to convert +&sgml; documents to other formats by providing a specification file +detailing exactly how you want to handle each element, external data +entity, subdocument entity, CDATA string, record end, SDATA string, +and processing instruction. &sgmlspl; also uses the &output.pm; +library (included in this distribution) to allow you to redirect or +capture output. + +To use &sgmlspl;, you simply prepare a specification file +containing regular &perl5; code. If your &sgml; document were named +doc.sgml, your &sgmlspl; specification file +were named, spec.pl, and the name of your new +file were doc.latex, then you could use the +following command in a Unix shell to convert your &sgml; document: + + +sgmls doc.sgml | sgmlspl spec.pl > doc.latex + + +&sgmlspl will pass any additional arguments on to the specification +file, which can process them in the regular &perl5; fashion. The +specification files used to convert this manual — +tolatex.pl and tohtml.pl +— are available with the &sgmls.pm; distribution. + + + + + +How do I install &sgmlspl; on my system? + +To use &sgmlspl;, you need to install &sgmls.pm; on your system, +by copying the &sgmls.pm; file to a directory searched by &perl5;. +You also need to install &output.pm; in the same directory, and +&sgmlspl; (with execute permission) somewhere on your +PATH. The easiest way to do all of this on a Unix +system is to change to the root directory of this distribution +(SGMLSpm), edit the Makefile +appropriately, and type + + +make install + + + + + + +Is &sgmlspl; the best way to convert &sgml; documents? + +Not necessarily. While &sgmlspl; is fully functional, it is not +always particularly intuitive or pleasant to use. There is a new +proposed standard, Document Style Semantics and +Specification Language (DSSSL), based +on the Scheme programming language, and +implementations should soon be available. To read more about the +DSSSL standard, see http://www.jclark.com/dsssl/ +on the Internet. + +That said, DSSSL is a declarative, +side-effect-free programming language, while &sgmlspl; allows you to +use any programming constructions available in &perl5;, including +those with side-effects. This means that if you want to do more than +simply format the document or convert it from one Document +Type Definition (DTD) to another, +&sgmlspl; might be a good choice. + + + + + +How does the specification file tell &sgmlspl; what to do? + +&sgmlspl; uses an event model rather than +a procedural model — instead of +saying do A then B then C you say whenever X +happens, do A; whenever Y happens, do B; whenever Z happens, do +C. In other words, while you design the code, &sgmlspl; +decides when and how often to run it. + +The specification file, which contains your instructions, is +regular &perl5; code, and you can define packages and subroutines, +display information, read files, create variables, etc. For +processing the &sgml; document, however, &sgmlspl; exports a single +subroutine, sgml(event, +handler), into the 'main' package +— each time you call sgml, you declare a +handler for a specific type of &sgmls; event, and &sgmlspl; will then +execute that handler every time the event occurs. You may use +sgml to declare a handler for a generic event, like +'start_element', or a specific event, +like '<DOC>' — a specific event will +always take precedence over a generic event, so when the +DOC element begins, &sgmlspl; will execute the +'<DOC>' handler rather than the +'start_element' handler. + + + + + +What about the <parameter>handler</parameter> argument? + +The second argument to the sgml subroutine is +the actual code or data associated with each event. If it is a +string, it will be printed literally using the +output subroutine from the &output.pm; library; if +it is a reference to a &perl5; subroutine, the subroutine will be +called whenever the event occurs. The following three +sgml commands will have identical results: + + +# Example 1 +sgml('<DOC>', "\\begin{document}\n"); + +# Example 2 +sgml('<DOC>', sub { + output "\\begin{document}\n"; +}); + +# Example 3 +sub do_begin_document { output "\\begin{document}\n"; } +sgml('<DOC>', \&do_begin_document); + + +For simply printing a string, of course, it does not make sense +to use a subroutine; however, the subroutines can be useful when you +need to check the value of an attribute, perform different actions in +different contexts, or perform other types of relatively more +complicated post-processing. + +If your handler is a subroutine, then it will receive two +arguments: the &sgmls.pm; event's data, and the &sgmls.pm; event +itself (see the &sgmls.pm; documentation for a description +of event and data types). The following example will print +'\begin{enumerate}' if the value of the attribute +TYPE is 'ORDERED', and +'\begin{itemize}' if the value of the attribute +TYPE is 'UNORDERED': + + +sgml('<LIST>', sub { + my ($element,$event) = @_; + my $type = $element->attribute('TYPE')->value; + + if ($type eq 'ORDERED') { + output "\\begin{enumerate}\n"; + } elsif ($type eq 'UNORDERED') { + output "\\begin{itemize}\n"; + } else { + die "Bad TYPE '$type' for element LIST at line " . + $event->line . " in " . $event->file . "\n"; + } +}); + + +You will not always need to use the event +argument, but it can be useful if you want to report line numbers or +file names for errors (presuming that you called &sgmls; or &nsgmls; +with the -l option). If you have a new version +of &nsgmls; which accepts the -h option, you +can also use the event argument to look up +arbitrary entities declared by the program. See the SGMLS_Event documentation for +more information. + + + + + +What are the generic events? + +&sgmlspl; recognises the twelve generic events listed in table +. You may provide any one of these +as the first argument to sgml to declare a handler +(string or subroutine) for that event. + + +&sgmlspl; generic events + + + + + + +Event +Description + + + + + + + +'start' +Execute handler (with no arguments) at +the beginning of the parse. + + + +'end' +Execute handler (with no arguments) at +the end of the parse. + + + +'start_element' +Execute handler at the beginning of every +element without a specific start handler. + + + +'end_element' +Execute handler at the end of every +element without a specific end handler. + + + +'cdata' +Execute handler for every character-data +string. + + + +'sdata' +Execute handler for every special-data +string without a specific handler. + + + +'re' +Execute handler for every +record end. + + + +'pi' +Execute handler for every processing +instruction. + + + +'entity' +Execute handler for every external data +entity without a specific handler. + + + +'start_subdoc' +Execute handler at the beginning of every +subdocument entity without a specific handler. + + + +'end_subdoc' +Execute handler at the end of every +subdocument entity without a specific handler. + + + +'conforming' +Execute handler once, at the end of the +document parse, if and only if the document was conforming. + + + + +
    + +The handlers for all of these except the document events +'start' and 'end' will receive +two arguments whenever they are called: the first will be the data +associated with the event (if any), and the second will be the +SGMLS_Event object itself (see the document for +&sgmls.pm;). Note the following example, which allows processing +instructions for including the date or the hostname in the document at +parse time: + + +sgml('pi', sub { + my ($instruction) = @_; + if ($instruction eq 'date') { + output `date`; + } elsif ($instruction eq 'hostname') { + output `hostname`; + } else { + print STDERR "Warning: unknown processing instruction: $instruction\n"; + } +}); + + +With this handler, any occurance +of <?date> in the original &sgml; document +would be replaced by the current date and time, and any occurance of +<?hostname> would be replaced by the name of +the host. + +
    + + + +What are the specific events? + +In addition to the generic +events listed in the previous section, &sgmlspl; allows +special, specific handlers for the beginning and end of elements and +subdocument entities, for SDATA strings, and for external data +entities. Table lists the +different specific event types available. + + +Specific event types + + + + + + +Event +Description + + + + + + + +'<GI>' +Execute handler at the beginning of +every element named 'GI'. + + + +'</GI>' +Execute handler at the end of every +element named 'GI'. + + + +'|SDATA|' +Execute handler for every special-data +string 'SDATA'. + + + +'&ENTITY;' +Execute handler for every external data +entity named 'ENTITY'. + + + +'{ENTITY}' +Execute handler at the beginning of +every subdocument entity named 'ENTITY'. + + + +'{/ENTITY}' +Execute handler at the end of every +subdocument entity named 'ENTITY'. + + + + + +
    + +Note that these override the generic-event handlers. For example, if you +were to type + + +sgml('&FOO;', sub { + output "Found a \"foo\" entity!\n"; +}); + +sgml('entity', sub { + output "Found an entity!\n"; +}); + + +And the external data entity &FOO; +appeared in your &sgml; document, &sgmlspl; would call the first +handler rather than the second. + +Note also that start and end handlers are entirely separate +things: if an element has a specific start handler but no specific end +handler, the generic end handler will still be called at the end of +the element. To prevent this, declare a handler with an empty string: + + +sgml('</HACK>', ''); + + +
    + + + +Why does &sgmlspl; use <command>output</command> instead of +<command>print</command>? + +&sgmlspl; uses a special &perl5; library &output.pm; for +printing text. &output.pm; exports the subroutines +output(string…), +push_output(type[,data]), +and pop_output. The subroutine +output works much like the regular &perl5; function +print, except that you are not able to specify a +file handle, and you may include multiple strings as arguments. + +When you want to write data to somewhere other than +STDOUT (the default), then you use the subroutines +push_output and +pop_output to set +a new destination or to restore an old one. + +You can use the &output.pm; package in other programs by adding +the following line: + + +use SGMLS::Output; + + + + + + +How do I use <command>push_output</command>? + +The subroutine +push_output(type[,data]) +takes two arguments: the type, which is always +required, and the data, which is needed for +certain types of output. Table lists the different types which you +can push onto the output stack. + + +Types for <command>push_output</command> + + + + + + +Type +Data +Description + + + + + + + +'handle' +a filehandle +Send all output to the supplied filehandle. + + + +'file' +a filename +Open the supplied file for writing, erasing its current +contents (if any), and send all output to it. + + + +'append' +a filename +Open the supplied file for writing and append all output to its +current contents. + + + +'pipe' +a shell command +Pipe all output to the supplied shell command. + + + +'string' +a string [optional] +Append all output to the supplied string, which will be +returned by pop_output. + + + +'nul' +[none] +Ignore all output. + + + + +
    + +Because the output is stack-based, you do not lose the previous +output destination when you push a new one. This is especially +convenient for dealing with data in tree-structures, like &sgml; data +— for example, you can capture the contents of sub-elements as +strings, ignore certain types of elements, and split the output from +one &sgml; parse into a series of sub-files. Here are some examples: + + +push_output('string'); # append output to an empty string +push_output('file','/tmp/foo'); # send output to this file +push_output('pipe','mail webmaster'); # mail output to 'webmaster' (!!) +push_output('nul'); # just ignore all output + + +
    + + + +How do I use <command>pop_output</command>? + +When you want to restore the previous output after using push_output, simply +call the subroutine pop_output. If the output type +was a string, pop_output will return the string +(containing all of the output); otherwise, the return value is not +useful. + +Usually, you will want to use push_output in +the start handler for an element or subdocument entity, and +pop_output in the end handler. + + + + + +How about an example for <command>output</command>? + +Here is a simple example to demonstrate how output, push_output, and +pop_output work: + + +output "Hello, world!\n"; # (Written to STDOUT by default) +push_output('nul'); # Push 'nul' ahead of STDOUT +output "Hello, again!\n"; # (Discarded) +push_output('file','foo.out'); # Push file 'foo.out' ahead of 'nul' +output "Hello, again!\n"; # (Written to the file 'foo.out') +pop_output; # Pop 'foo.out' and revert to 'nul' +output "Hello, again!\n"; # (Discarded) +push_output('string'); # Push 'string' ahead of 'nul' +output "Hello, "; # (Written to the string) +output "again!\n"; # (Also written to the string) + # Pop the string "Hello, again!\n" +$foo = pop_output; # and revert to 'nul' +output "Hello, again!\n"; # (Discarded) +pop_output; # Pop 'nul' and revert to STDOUT +output "Hello, at last!\n"; # (Written to STDOUT) + + + + + + +Is there an easier way to make specification files? + +Yes. The script skel.pl, included in this +package, is an &sgmlspl; specification which writes a specification +(!!!). To use it under Unix, try something like + + +sgmls foo.sgml | sgmlspl skel.pl > foo-spec.pl + + +(presuming that there is a copy of skel.pl +in the current directory or in a directory searched by &perl5;) to +generate a new, blank template named +foo-spec.pl. + + + + + +How should I handle forward references? + +Because &sgmlspl; processes the document as a linear data +stream, from beginning to end, it is easy to refer +back to information, but relatively difficult to +refer forward, since you do not know what will be +coming later in the parse. Here are a few suggestions. + +First, you could use push_output and +pop_output to +save up output in a large string. When you have found the information +which you need, you can make any necessary modifications to the string +and print it then. This will work for relatively small chunks of a +document, but you would not want to try it for anything larger. + +Next, you could use the ext method to +add extra pointers, and build a parse tree of the whole document +before processing any of it. This method will work well for small +documents, but large documents will place some serious stress on your +system's memory and/or swapping. + +A more sophisticated solution, however, involves the +Refs.pm module, included in this +distribution. In your &sgmlspl; script, include the line + + +use SGMLS::Refs.pm; + + +to activate the library. The library will create a database +file to keep track of references between passes, and to tell you if +any references have changed. For example, you might want to try +something like this: + + +sgml('start', sub { + my $Refs = new SGMLS::Refs('references.refs'); +}); +sgml('end', sub { + $Refs->warn; + destroy $Refs; +}); + + +This code will create an object, $Refs, linked to a file of +references called references.refs. The +SGMLS::Refs class understands the methods +listed in table + + +The SGMLS::Refs class + + + + + + +Method +Return Type +Description + + + + + + + +new(filename,[logfile_handle]) +SGMLS::Refs +Create a new SGMLS::Refs object. +Arguments are the name of the hashfile and (optionally) a writable +filehandle for logging changes. + + + +get(key) +string +Look up a reference key in the hash file and return its value. + + + +put(key,value) +[none] +Set a new value for the key in the hashfile. + + + +count +number +Return the number of references whose values have changed (thus +far). + + + +warn +1 or 0 +Print a warning mentioning the number of references which have +changed, and return 1 if a warning was printed. + + + + +
    + +
    + + + +Are there any bugs? + +Any bugs in &sgmls.pm; will be here too, since &sgmlspl; relies +heavily on that &perl5; library. + + + +
    + + diff --git a/DOC/sgmlspm.sgml b/DOC/sgmlspm.sgml new file mode 100644 index 0000000..cc61533 --- /dev/null +++ b/DOC/sgmlspm.sgml @@ -0,0 +1,883 @@ + + %ISOpub; + + %ISOnum; + + + + SGML"> + ESIS"> + SGMLS.pm"> + perl5"> + perl5"> + sgmls"> + nsgmls"> +]> + +
    + + + +SGMLS.pm: a perl5 class library for handling output from the +SGMLS and NSGMLS parsers (version 1.03) + + + +David +Megginson + +University of Ottawa +Department of English +
    dmeggins@aix1.uottawa.ca
    +
    +
    +
    + +[unpublished] + +
    + +Welcome to &sgmls.pm;, an extensible &perl5; class library for +processing the output from the &sgmls; and &nsgmls; parsers. +&sgmls.pm; is free, copyrighted software available by anonymous ftp in +the directory ftp://aix1.uottawa.ca/pub/dmeggins/. +You might also want to look at the documentation for sgmlspl, +a simple sample script which uses &sgmls.pm; to convert documents from +&sgml; to other formats. + + +Terms + +This program, along with its documentation, is free software; +you can redistribute it and/or modify it under the terms of the GNU +General Public License as published by the Free Software Foundation; +either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public +License along with this program; if not, write to the Free Software +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + + + + +What is &sgmls.pm;? + +&sgmls.pm; is an extensible &perl5; +class library for parsing the output from James Clark's popular +&sgmls; and &nsgmls; parsers, available on the Internet at ftp://jclark.com. +This is not a complete system for translating +documents written the the Standard Generalised Markup +Language (&sgml;) into other formats, but it can easily +form the basis of such a system (for a simple example, see the sgmlspl +program included in this package). + +The library recognises four basic types of &sgml; objects: the +element, the +attribute, +the notation, and the +entity; each +of these is a fully-developed class with methods for accessing +important information. + + + + + +How do I produce &sgml; documents? + +I am presuming here that you are already experienced with &sgml; +and the &sgmls; or &nsgmls; parser. For help with the parsers see the +manual pages accompanying each one; for help with &sgml; see Robin +Cover's SGML Web Page at http://www.sil.org/sgml/sgml.html +on the Internet. + + + + + +How do I program in &perl5;? + +If you have to ask this question, you probably should not be +trying to use this library right now, since it is intended only for +experienced &perl5; programmers. That said, however, you can find the +&perl5; documentation with the &perl5; source distribution or on the +World-Wide Web at http://www.metronet.com/0/perlinfo/perl5/manual/perl.html. + +Please do not write to me for help on using +&perl5;. + + + + + +How do I use &sgmls.pm;? + +First, you need to copy the file &sgmls.pm; to a directory where +perl can find it (on a Unix system, it might be +/usr/lib/perl5 or +/usr/local/lib/perl5, or whatever the environment +variable PERL5LIB is set to) and make certain that it +is world-readable. + +Next, near the top of your &perl5; program, type the following +line: + + +use SGMLS; + + +You must then open up a file handle from which &sgmls.pm; can read the +data from an &sgmls; or &nsgmls; process, unless you are reading from +a standard handle like STDIN — for example, +if you are piping the output from &sgmls; to a &perl5; script, using +something like + + +sgmls foo.sgml | perl myscript.pl + + +then the predefined filehandle STDIN will be +sufficient. In DOS, you might want to dump the sgmls output to a file +and use it as standard input (or open it explicitly in perl), and in +Unix, you might actually want to open a pipe or socket for the input. +&sgmls.pm; doesn't need to seek, so any input stream should +work. + +To parse the &sgmls; or &nsgmls; output from the handle, create +a new object instance of the SGMLS class with +the handle as an argument, i.e. + + +$parse = new SGMLS(STDIN); + + +(You may create more than one SGMLS +object at once, but each object must have a +unique handle pointing to a unique stream, or +chaos will result.) Now, you can retrieve and +process events using the next_event method: + + +while ($event = $parse->next_event) { + #do something with each event +} + + + + + + +So what do I do with an event? + +The next_event method for the SGMLS class returns an +object belonging to the class SGMLS_Event. +This class has several methods available, as listed in table . + + +The <classname>SGMLS_Event</classname> class + + + + + + +Method +Return Type +Description + + + + + + + +type +string +Return the type of the event. + + + +data +string, SGMLS_Element, or +SGMLS_Entity +Return any data associated with the event. + + + +file +string +Return the name of the &sgml; source file which generated the +event, if available. + + + +line +string +Return the line number of the &sgml; source file which +generated the event, if available. + + + +element +SGMLS_Element +Return the element in force when the event was +generated. + + + +parse +Return the SGMLS object for the current +parse. + + + +entity(ename) +Look up an entity from those currently known to the parse. An +alias for ->parse->entity($ename) + + + +notation(nname) +Look up the notation from those currently known to the parse: +an alias for ->parse->notation($nname). + + + + +
    + +The file and line methods +will return useful information only if you called &sgmls; or &nsgmls; +with the -l flag to include file and +line-number information. + +
    + + + +What are the different event types and data? + +Table lists the ten +different event types returned by the next_event +method of an SGMLS +object and the different types of data associated with each of these +(note that these do not correspond to the +standard &esis; events). + + + +The <classname>SGMLS_Event</classname> types + + + + + + +Event Type +Event Data +Description + + + + + + + + +'start_element' +SGMLS_Element +The beginning of an element. + + + +'end_element' +SGMLS_Element +The end of an element. + + + +'cdata' +string +Regular character data. + + + +'sdata' +string +Special system data. + + + +'re' +[none] +A record-end (i.e., a newline). + + + +'pi' +string +A processing instruction + + + +'entity' +SGMLS_Entity +A non-SGML external entity. + + + +'start_subdoc' +SGMLS_Entity +The beginning of an SGML subdocument. + + + +'end_subdoc' +SGMLS_Entity +The end of an SGML subdocument. + + + +'conforming' +[none] +The document was valid. + + + + +
    + +For example, if $event->type returns +'start_element', then +$event->data will return an object belonging to the +SGMLS_Element +class (which will contain a list of attributes, etc. — see +below), $event->file and +$event->line will return the file and line-number +in which the element appeared (if you called &sgmls; or &nsgmls; with +the -l flag), and +$event->element will return the element currently +in force (in this case, the same as +$event->data). + +
    + + + +What do I do with an <classname>SGMLS_Element</classname>? + +Altogether, there are six classes in &sgmls.pm;, each with its +own methods: in addition to SGMLS (for the parse) and +SGMLS_Event +(for a specific event), the classes are +SGMLS_Element, SGMLS_Attribute, +SGMLS_Entity, +and SGMLS_Notation. +Like all of these, SGMLS_Element has a number +of methods available for obtaining different types of information. +For example, if you were to use + + +my $element = $event->data + + +to retrieve the data for a 'start_element' or +'end_element' event, then you could use the methods +listed in table to find more +information about the element. + + +The <classname>SGMLS_Element</classname> class + + + + + + +Method +Return Type +Description + + + + + + + +name +string +The name (or GI), in upper-case. + + + +parent +SGMLS_Element +The parent element, or '' if this is the top +element. + + + +attributes +HASH +Return a reference to a hash table of +SGMLS_Attribute objects, keyed by the attribute +names (in upper-case). + + + +attribute_names +ARRAY +A list of all attribute names for the current element (in +upper-case). + + + +attribute(aname) +SGMLS_Attribute +Return the attribute named ANAME. + + + +set_attribute(attribute) +[none] +The attribute argument should be an +object belonging to the SGMLS_Attribute +class. Add it to the element, replacing any previous attribute with +the same name. + + + +in(name) +SGMLS_Element +If the current element's parent is named +name, return the parent; otherwise, return +''. + + + +within(name) +SGMLS_Element +If any ancestor of the current element is named +name, return it; otherwise, return +''. + + + + +
    + +
    + + + +What do I do with an +<classname>SGMLS_Attribute</classname>? + +Note that objects of the SGMLS_Attribute +class do not have events in their own right, and are available only +through the attributes or +attribute(aname) methods for +SGMLS_Element +objects. An object belonging to the +SGMLS_Attribute class will recognise the +methods listed in table . + + +The <classname>SGMLS_Attribute</classname> class + + + + + + +Method +Return Type +Description + + + + + + + +name +string +The name of the attribute (in upper-case). + + + +type +string +The type of the attribute: 'IMPLIED', +'CDATA', 'NOTATION', +'ENTITY', or 'TOKEN'. + + + +value +string, SGMLS_Entity, or +SGMLS_Notation. +The value of the attribute. If the type is +'CDATA' or 'TOKEN', it will be a +simple string; if it is 'NOTATION' it will be an +object belonging to the SGMLS_Notation class, +and if it is 'Entity' it will be an object +belonging to the SGMLS_Entity class. + + + +is_implied +boolean +Return true if the value of the attribute is implied, or false if +it has an explicit value. + + + +set_type(type) +[none] +Provide a new type for the current attribute -- no sanity +checking will be performed, so be careful. + + + +set_value(value) +[none] +Provide a new value for the current attribute -- no sanity +checking will be performed, so be careful. + + + + +
    + +Note that the type 'TOKEN' includes both +individual tokens and lists of tokens (ie 'TOKENS', +'IDS', or 'IDREFS' in the +original &sgml; document), so you might need to use the perl function +'split' to break the value string into a list. + +
    + + + +What do I do with an <classname>SGMLS_Entity</classname>? + +An SGMLS_Entity object can come in an +'entity' event (in +which case it is always external), in a +'start_subdoc' or 'end_subdoc' +event (in which case it always has the type +'SUBDOC'), or as the value of an attribute (in +which case it may be internal or external). An object belonging to +the SGMLS_Entity class may use the methods +listed in table . + + +The <classname>SGMLS_Entity</classname> class + + + + + + +Method +Return Type +Description + + + + + + + +name +string +The entity name. + + + +type +string +The entity type: 'CDATA', +'SDATA', 'NDATA', or +'SUBDOC'. + + + +value +string +The entity replacement text (internal entities +only). + + + +sysid +string +The system identifier (external entities only). + + + +pubid +string +The public identifier (external entities only). + + + +filenames +ARRAY +A list of file names generated from the sysid and pubid +(external entities only). + + + +notation +SGMLS_Notation +The associated notation (external data entities only). + + + + +
    + +An entity of type 'SUBDOC' will have a sysid +and pubid, and external data entity will have a sysid, pubid, +filenames, and a notation, and an internal data entity will have a +value. + +
    + + + +What do I do with an <classname>SGMLS_Notation</classname>? + +The fourth data class is the notation, which is available only +as a return value from the notation method of an +SGMLS_Entity +or the value method of an SGMLS_Attribute +with type 'NOTATION'. You can use the notation to +decide how to treat non-SGML data (such as graphics). An object +belonging to the SGMLS_Notation class will have +access to the methods listed in table . + + +The <classname>SGMLS_Notation class</classname> + + + + + + +Method +Return Type +Description + + + + + + + +name +string +The notation's name. + + + +sysid +string +The notation's system identifier. + + + +pubid +string +The notation's public identifier. + + + + +
    + +What you do with this information is +entirely up to you. + +
    + + + +Is there any extra information available from the &sgml; +document? + +The SGMLS +object which you created at the beginning of the parse has several +methods available in addition to next_event — +you will find them all listed in table . There should normally be no need to +use the notation and entity +methods, since &sgmls.pm; will look up entities and notations for you +automatically as needed. + + +Additional methods for the <classname>SGMLS</classname> +class + + + + + +Method +Return Type +Description + + + + + + + +next_event +SGMLS_Event +Return the next event. + + + +appinfo +string +Return the APPINFO parameter from the &sgml; declaration, if +any. + + + +notation(nname) +SGMLS_Notation +Look up a notation by name. + + + +entity(ename) +SGMLS_Entity +Look up an entity by name. + + + + +
    + +
    + + + +How about a simple example? + +OK. The following script simply reports its events: + + +&sample.program; + + +To use it under Unix, try something like + + +sgmls document.sgml | perl sample.pl + + +and watch the output scroll down. + + + + + +How do I design my <emphasis>own</emphasis> classes? + +In addition to the methods listed above, all of the classes used +in &sgmls.pm; have an ext method which returns a +reference to an initially-empty hash table. You are free to use this +hash table to store anything you want — it +should be especially useful if you are building your own, derived +classes from the ones provided here. The following example builds a +derived class My_Element from the SGMLS_Element +class, adding methods to set and get the current font: + + +use SGMLS; + +package My_Element; +@ISA = qw(SGMLS_Element); + +sub new { + my ($class,$element,$font) = @_; + $element->ext->{'font'} = $font; + return bless $element; +} + +sub get_font { + my ($self) = @_; + return $self->ext->{'font'}; +} + +sub set_font { + my ($self,$font) = @_; + $self->ext->{'font'} = $font; +} + + +Note that the derived class does not need to have any knowledge +about the underlying structure of the SGMLS_Element +class, and need only avoid shadowing any of the methods currently +existing there. + +If you decide to create a derived class from the SGMLS, please note that in +addition to the methods listed above, that class uses internal methods +named element, line, and +file, similar to the same methods in SGMLS_Event — +it is essential that you not shadow these method names. + + + + + +Are there any bugs? + +Of course! Right now, &sgmls.pm; silently ignores link attributes +(&nsgmls; only) and data attributes, and there may be many other bugs +which I have not yet found. + + + +
    + + diff --git a/DOC/tohtml.pl b/DOC/tohtml.pl new file mode 100644 index 0000000..399167b --- /dev/null +++ b/DOC/tohtml.pl @@ -0,0 +1,569 @@ +######################################################################## +# SGMLSPL script to convert from the DocBook DTD to HTML pages. +# +# by David Megginson +# +# This is a slightly more complicated script than tolatex.pl, since it +# uses forward references and an external reference file. Note that +# this script is customised for the SGMLS.pm and sgmlspl documentation +# in this directory, and is not meant as a general-purpose +# DocBook->HTML translator (though it could form the basis of one). +# Because each parse uses information saved from the last parse, +# you might need two passes to make certain that all references are +# up to date. +# +# $Log: tohtml.pl,v $ +# Revision 1.4 1995/12/03 22:07:21 david +# Changed to use SGMLS::Output instead of Output, and to take advantage +# of the SGMLS::Refs package for forward references. +# +# Revision 1.3 1995/08/12 16:25:53 david +# Oops! Fixed comment leader in RCS file. +# +# Revision 1.2 1995/08/12 16:21:06 david +# Changes for release 1.01: fixed handling of prefixed sysid's from +# NSGMLS. +# +######################################################################## + +use SGMLS; # Use the SGMLS package. +use SGMLS::Output; # Use stack-based output. +use SGMLS::Refs; + +$version = '$Id: tohtml.pl,v 1.4 1995/12/03 22:07:21 david Exp $'; + +$basename = shift; # Extra argument to script is basename. + +# +# This conversion script keeps the idea of a current ID and a current +# file. Since the SGML document will be broken up into a series of +# smaller HTML documents, it is necessary to keep track of the current +# file name. The current ID is the ID (explicit or implied) of the +# most recent element which wants to capture titles, etc. +# +$current_id = ''; # The ID of the current container element. +@current_id_stack = (); # The IDs of any parent container elements. +$current_file = ''; # The name of the current output file. +@current_file_stack = (); # The names of the parent output files. + +$top_id = ''; # The ID of the top element. +$top_file = ''; # The ID of the top file. + +$previous_file = ''; # The previous file on the same level. + +$table_counter = 0; # The number of the current table. + + + + +######################################################################## +# Handler declarations for sgmlspl. +######################################################################## + +# +# Use the 'start' and 'end' handlers of the document to begin and +# terminate reference handling. +# +sgml('start', sub { + system("touch .redo_$basename"); + # Start up the reference manager. + $Refs = new SGMLS::Refs("$basename.refs"); +}); +sgml('end', sub { + unlink(".redo_$basename") unless $Refs->warn; +}); + + +# +# The
    is the top-level element. +# +sgml('
    ', sub { + start_html(shift); + $top_id = $current_id; + $top_file = $current_file; +}); +sgml('
    ', sub { end_html(); }); + +# +# Ignore all of the header except for the bits which we actually want, +# by pushing output to 'nul'. +# +sgml('', sub { push_output 'nul'; }); +sgml('', sub { pop_output(); }); + +# +# Save the title of something for future reference. +# +sgml('', sub { push_output 'string'; }); +sgml('', sub { $Refs->put("title:$current_id",pop_output()); }); + +# +# These are just containers in the . +# +sgml('', ""); +sgml('', ""); +sgml('', ""); +sgml('', ""); +sgml('', ""); +sgml('', ""); +sgml('
    ', ""); +sgml('
    ', ""); +sgml('', ""); +sgml('', ""); + +# +# Save the author's first name for future reference. +# +sgml('', sub { push_output 'string'; }); +sgml('', sub { $Refs->put("firstname:$current_id",pop_output()); }); + +# +# Save the author's surname for future reference. +# +sgml('', sub { push_output 'string'; }); +sgml('', sub { $Refs->put("surname:$current_id",pop_output()); }); + +# +# Save the organisation name for future reference. +# +sgml('', sub { push_output 'string'; }); +sgml('', sub { $Refs->put("orgname:$current_id",pop_output()); }); + +# +# Save the organisation division for future reference. +# +sgml('', sub { push_output 'string'; }); +sgml('', sub { $Refs->put("orgdiv:$current_id",pop_output()); }); + +# +# Save the email address for future reference. +# +sgml('', sub { push_output('string'); }); +sgml('', sub { $Refs->put("email:$current_id",pop_output()); }); + + + +# +# Sectioning elements -- all of these simply call the &start_html +# and &end_html subroutines, which do all of the real work. +# +sgml('', sub { start_html(shift); }); +sgml('', sub { end_html(); }); +sgml('', sub { start_html(shift); }); +sgml('', sub { end_html(); }); +sgml('', sub { start_html(shift); }); +sgml('', sub { end_html(); }); +sgml('', sub { start_html(shift); }); +sgml('', sub { end_html(); }); +sgml('', sub { start_html(shift); }); +sgml('', sub { end_html(); }); +sgml('', sub { start_html(shift); }); +sgml('', sub { end_html(); }); + + +# +# Paragraphs must be marked explicitly in HTML -- use the HTML 3 +# practice (actually just _real_ SGML, for a change) of marking both +# the beginning and the end. +# +sgml('', "

    "); +sgml('', "

    \n\n"); + + + + +# +# Cross-references. +# + +# +# This is an internal cross reference -- get the URL by +# simply adding ".html" to the IDREF (note that this would not work +# for tables!!!). +# +sgml('', sub { + my $element = shift; + output ""; +}); +sgml('', ""); + +# +# This is an external cross-reference, with a supplied URL. +# +sgml('', sub { + my $element = shift; + output "attribute(URL)->value; + output "\">"; +}); +sgml('', ""); + + +# +# This is a pointer to something (in this case, always a table). +# +sgml('', sub { + my $element = shift; + output $Refs->get('xref:' . lc($element->attribute(LINKEND)->value)); +}); +sgml('', ""); + + + +# +# Inline elements. +# + +# +# Print application names in typewriter. +# +sgml('', ""); +sgml('', ""); + +# +# Print acronyms in bold. +# +sgml('', ""); +sgml('', ""); + +# +# Print terms in italics. +# +sgml('', ""); +sgml('', ""); + +# +# Print file names in typewriter. +# +sgml('', ""); +sgml('', ""); + +# +# Print symbols in typewriter. +# +sgml('', ""); +sgml('', ""); + +# +# Print return values in typewriter. +# +sgml('', ""); +sgml('', ""); + +# +# Print quotations in quotation marks. +# +sgml('', '"'); +sgml('', '"'); + +# +# Print commands in typewriter. +# +sgml('', ""); +sgml('', ""); + +# +# Print parameters in typewriter. +# +sgml('', ""); +sgml('', ""); + +# +# Print literal elements in typewriter. +# +sgml('', ""); +sgml('', ""); + +# +# Print class names in typewriter. +# +sgml('', ""); +sgml('', ""); + +# +# Emphasise emphasis. +# +sgml('', ""); +sgml('', ""); + + + +# +# Block elements. +# + +# +# Program listings are preformatted. +# +sgml('', "

    \n

    ");
    +sgml('', "
    \n

    \n"); + +# +# Keep a counter for table numbers, note the ID, and look up the +# title (caption) for the table. +# +sgml('', sub { + my $element = shift; + push @current_id_stack, $current_id; + $current_id = lc($element->attribute(ID)->value || gen_id()); + $table_counter++; + $Refs->put("xref:$current_id",$table_counter); + output "\n

    Table $table_counter: " + . $Refs->get("title:$current_id") . "

    \n\n"; +}); +sgml('
    ', sub { + output "\n"; + $current_id = pop @current_id_stack; +}); + +# +# Nothing needs to be done here -- we don't care how many cells there are. +# +sgml('', ""); +sgml('', ""); + +# +# We will keep track of all of the entries in the head, for later use. +# +sgml('', sub { @cell_headings = (); push_output('nul'); }); +sgml('', sub { pop_output(); }); + +# +# Print a single horizontal rule before the beginning of the body. +# +sgml('', "
    "); +sgml('', ""); + +# +# Make each row into a labelled list (!!) -- HTML 3 does have tables, +# but they might not be able to handle the paragraph-length entries +# which I used in my documentation (these will not print if we are +# in the , since output will be 'nul'). +# +sgml('', sub { + output "\n
    \n"; + $cell_counter = 0; +}); +sgml('', "\n
    \n
    \n\n"); + +# +# If an entry is in the , save it for later use; otherwise, +# print the entry as a list item with its corresponding entry +# as a label. +# +sgml('', sub { + my $element = shift; + if ($element->within(THEAD)) { + push_output 'string'; + } else { + output "
    "; + output $cell_headings[$cell_counter]; + output "
    \n
    "; + } +}); +sgml('', sub { + my $element = shift; + if ($element->within(THEAD)) { + $cell_headings[$cell_counter] = pop_output(); + } else { + output "
    \n"; + } + $cell_counter++; +}); + + + +######################################################################## +# SDATA Handlers -- use HTML entities wherever possible. +######################################################################## + +sgml('|[lt ]|', "<"); +sgml('|[gt ]|', ">"); +sgml('|[mdash ]|', "--"); +sgml('|[LaTeX]|', "LaTeX"); +sgml('|[hellip]|', "..."); +sgml('|[amp ]|', "&"); + + + +######################################################################## +# The generic external data entity handler. Handle only entities +# with type CDATA, and simply dump their files into the current +# document with minimal conversion. +######################################################################## + +sgml('entity',sub { + my $entity = shift; + # Use the first generated filename + # or the system identifier. + my $filename = $entity->filenames->[0] || $entity->sysid; + # A strange, NSGMLS-thing. + if ($filename =~ /^FILE:/ || $filename =~ /^\]+\>/) { + $filename = $'; + } + + # Handle only CDATA. + if ($entity->type eq 'CDATA') { + + if (-r $filename) { + unless (open INPUT, "<$filename") { + die "Cannot open external file $filename\n"; + } + # Convert special SGML characters. + while () { + s/\&/\&/g; + s/\/\>/g; + output $_; + } + close INPUT; + } else { + die "Cannot read file $filename\n"; + } + } else { + die "Cannot handle external entity with type " . $entity->type . "\n"; + } +}); + + + +######################################################################## +# Default handlers -- these will pick up any unrecognised elements, +# SDATA strings, processing instructions, or subdocument entities, +# and report an error to the user. +######################################################################### + + +sgml('start_element',sub { die "Unknown element: " . $_[0]->name; }); +sgml('sdata',sub { die "Unknown SDATA: " . $_[0]; }); +sgml('pi',sub { die "Unknown processing instruction: " . $_[0]; }); +sgml('start_subdoc',sub { die "Unknown subdoc entity: " . $_[0]->name; }); + + +# +# End of sgmlspl handler declarations. +# + + +######################################################################## +# Utility procedures. +######################################################################## + + +# +# Given an element, start a new HTML document for it. +# +sub start_html { + my $element = shift; + my $old_file = $current_file; + + # Save the old values on the stack. + push @current_id_stack, $current_id; + push @current_file_stack, $current_file; + + # Get the new ID and file. + $current_id = lc($element->attribute(ID)->value || gen_id()); + $current_file = $current_id . '.html'; + + # Note the previous child, if any. + if ($previous_file) { + $Refs->put("previous:$current_file",$previous_file); + $Refs->put("next:$previous_file",$current_file); + } + $previous_file = ''; + + # Put a reference up to the parent. + if ($old_file) { + $Refs->put("up:$current_file",$old_file); + } + + # Look up the title reference. + my $plaintitle = my $title = $Refs->get("title:$current_id"); + + # Strip the title. + $plaintitle =~ s/\<[^\>]+\>//g; + + # If this is not the top-level + # file, send out a link + # before beginning the new file. + if ($old_file) { + output "
  • $plaintitle
  • \n"; + } + + # Send output to the new file. + push_output('file',$current_file); + + # Print the front matter. + output "\n\n$plaintitle\n\n\n"; + + # Navigational aids, if this is not + # the top-level file. + if ($old_file) { + output "\n

    Links: "; + my $up = $Refs->get("up:$current_file"); + my $previous = $Refs->get("previous:$current_file"); + my $next = $Refs->get("next:$current_file"); + output "Next " if $next; + output "Previous " if $previous; + output "Up " if $up; + output "Top"; + output "

    \n\n"; + } + + + output "

    $title

    \n\n"; +} + +# +# End the HTML document. +# +sub end_html { + # Look up the name and email info. + my $firstname = $Refs->get("firstname:$current_id") || + $Refs->get("firstname:$top_id"); + my $surname = $Refs->get("surname:$current_id") || + $Refs->get("surname:$top_id"); + my $email = $Refs->get("email:$current_id") || + $Refs->get("email:$top_id"); + + # Restore the previous ID and file, + # and note this as the previous + # child. + $previous_file = $current_file; + $current_id = pop @current_id_stack; + $current_file = pop @current_file_stack; + + # If this is not the top-level file, + # add some navigational information. + if ($current_file) { + output "\n

    Links: "; + my $up = $Refs->get("up:$previous_file"); + my $previous = $Refs->get("previous:$previous_file"); + my $next = $Refs->get("next:$previous_file"); + output "Next " if $next; + output "Previous " if $previous; + output "Up " if $up; + output "Top"; + output "

    \n\n"; + } + + # Add an address, if available, + # including a MAILTO URL. + output "\n
    "; + output "$firstname $surname " if $firstname || $surname; + output "<$email>" if $email; + output "
    \n\n\n"; + pop_output(); +} + +# +# Generate a new ID for anything which does not already have one. +# +sub gen_id { + $id_counter++; + return "node$id_counter"; +} + +1; diff --git a/DOC/tolatex.pl b/DOC/tolatex.pl new file mode 100644 index 0000000..fb25443 --- /dev/null +++ b/DOC/tolatex.pl @@ -0,0 +1,508 @@ +######################################################################## +# SGMLSPL script to convert from the DocBook DTD to LaTeX. +# +# by David Megginson +# +# This is a simple translation sample script. Note that this +# script is customised for the SGMLS.pm and sgmlspl documentation in +# this directory, and is not meant as a general-purpose DocBook->LaTeX +# translator (though it could form the basis of one). +# +# $Log: tolatex.pl,v $ +# Revision 1.4 1995/12/03 22:08:03 david +# Changed to use SGMLS::Output instead of Output, and to take advantage +# of the SGMLS::Refs package for forward references. +# +# Revision 1.3 1995/08/12 16:25:07 david +# Oops! Fixed comment leader in RCS file. +# +# Revision 1.2 1995/08/12 16:22:17 david +# Revised for release 1.01: fixed handling of prefixed sysid's from +# NSGMLS. +# +######################################################################## + +use SGMLS; # Use the SGMLS package. +use SGMLS::Output; # Use stack-based output. +use SGMLS::Refs; # Use forward-reference manager. + +$version = '$Id: tolatex.pl,v 1.4 1995/12/03 22:08:03 david Exp $'; + +$basename = shift; # We need an explicit basename to + # produce different reference + # files for the two documents. + +######################################################################## +# Document handlers, called at the beginning and end of the entire parse. +######################################################################## + +sgml('start', sub { + system("touch .redo_$basename"); + # Start up the reference manager. + $Refs = new SGMLS::Refs("$basename.refs"); +}); +sgml('end', sub { + unlink(".redo_$basename") unless $Refs->warn; +}); + + +######################################################################## +# Specific element handlers, called at the beginning and end of each +# corresponding element. +######################################################################## + +# +# The
    is the top-level argument: when it starts, print the +# beginning of the LaTeX preamble; when it ends, print the LaTeX +# close. +# +sgml('
    ', "\\documentstyle[11pt]{article}\n\n" . + "\\setlength{\\parskip}{3ex}\n" . + "\\raggedright\n\n"); +sgml('
    ', "\\end{document}\n"); + +# +# The contains bibliographical information about the +# article. Push output to nul so that nothing prints by default, +# grab any relevant information into variables using the sub-elements, +# then end the preamble and begin the actual document once the +# header is finished. +# +sgml('', sub { push_output('nul'); }); +sgml('', sub { + pop_output; + output "\\title{$title}\n"; + output "\\author{$firstname $surname \\\\\n"; + output " $orgdiv, \\\\\n" if $orgdiv; + output " $orgname, \\\\\n" if $orgname; + output " Email: {\\tt $email} \\\\\n" if $email; + output "}\n\n"; + output "\\date{$date}\n" if $date; + output "\n\\begin{document}\n\\maketitle\n\n"; +}); + +# +# Use push_output and pop_output to save the contents of the +# element. The element's treatment is context-sensitive: inside +# an <ARTHEADER>, save the title in the $title variable; inside <SECT1>, +# or <IMPORTANT>, begin the new section now; inside a <TABLE>, print +# the caption. +# +sgml('<TITLE>', sub { push_output 'string'; }); +sgml('', sub { + my $element = shift; + my $data = pop_output; + if ($element->in(ARTHEADER)) { + $title = $data; + } elsif ($element->in(SECT1) || $element->in(IMPORTANT)) { + $Refs->put("title:$current_section",$data); + } elsif ($element->in(TABLE)) { + $Refs->put("title:$current_table",$data); + } else { + die "Do not know what to do with TITLE in " + . $element->parent->name . "\n"; + } +}); + +# Element: AUTHORGROUP +sgml('', ""); +sgml('', ""); + +# Element: AUTHOR +sgml('', ""); +sgml('', ""); + +# +# Save the contents of into the variable $firstname, +# presuming that this appears only within the . +# +sgml('', sub { push_output('string'); }); +sgml('', sub { $firstname = pop_output; }); + +# +# Save the contents of into the variable $surname, +# presuming that this appears only within the . +# +sgml('', sub { push_output('string'); }); +sgml('', sub{ $surname = pop_output; }); + +# Element: AFFILIATION +sgml('', ""); +sgml('', ""); + +# +# Save the contents of into the variable $orgname, +# presuming that this appears only within the . +# +sgml('', sub { push_output('string'); }); +sgml('', sub { $orgname = pop_output; }); + +# +# Save the contents of into the variable $orgdiv, +# presuming that this appears only within the . +# +sgml('', sub { push_output('string'); }); +sgml('', sub { $orgdiv = pop_output; }); + +# Element: ADDRESS +sgml('
    ', ""); +sgml('
    ', ""); + +# +# Save the contents of into the variable $email, +# presuming that this appears only within the . +# +sgml('', sub { push_output('string'); }); +sgml('', sub { $email = pop_output; }); + +# Element: ARTPAGENUMS +sgml('', ""); +sgml('', ""); + +# +# Print an section in italics. +# +sgml('', sub { + my $element = shift; + + push @current_section_stack,$current_section; + $current_section = $element->attribute(ID)->value || generate_id(); + my $title = $Refs->get("title:$current_section") || '[Untitled]'; + output "\n\n\{\\em\\section{$title}\n\\label{$current_section}\n\n"; +}); +sgml('', "\n\n\}\n\n"); + +# +# Blank lines delimit paragraphs in LaTeX. +# +sgml('', "\n"); +sgml('', "\n"); + +# +# The actual section will begin with the element, but note +# the section's ID now. +# +sgml('<SECT1>', sub { + my $element = shift; + + push @current_section_stack,$current_section; + $current_section = $element->attribute(ID)->value || generate_id(); + my $title = $Refs->get("title:$current_section") || '[Untitled]'; + output "\n\n\\section{$title}\n\\label{$current_section}\n\n"; +}); +sgml('</SECT1>', sub { + $current_section = pop @current_section_stack; + output "\n\n"; +}); + +# Element: LINK (used only for HTML version) +sgml('<LINK>', ""); +sgml('</LINK>', ""); + +# Element: ULINK (used only for HTML version) +sgml('<ULINK>', ""); +sgml('</ULINK>', ""); + +# +# An XREF may print the section number (or table number, etc.) or the actual +# page number, depending upon the value of the ROLE= attribute. +# +sgml('<XREF>', sub { + my $element = shift; + my $idref = $element->attribute('LINKEND')->value; + my $type = $element->attribute('ROLE')->value; + if ($type eq 'page') { + output "\\pageref{$idref}"; + } else { + output "\\ref{$idref}"; + } +}); +sgml('</XREF>', ""); + +# +# The name of an application will appear in small caps. +# +sgml('<APPLICATION>', "{\\sc "); +sgml('</APPLICATION>', "}"); + +# +# Use LaTeX emphasis for emphasis. +# +sgml('<EMPHASIS>', "{\\em "); +sgml('</EMPHASIS>', "\\/}"); + +# +# Technical terms are simply emphasised. +# +sgml('<GLOSSTERM>', "{\\em "); +sgml('</GLOSSTERM>', "\\/}"); + +# +# Use proper quotation marks for quotes, with braces to get the ligaturing +# right. +# +sgml('<QUOTE>', "{``}"); +sgml('</QUOTE>', "{''}"); + +# +# Acronyms appear in small caps. +# +sgml('<ACRONYM>', "{\\sc "); +sgml('</ACRONYM>', "}"); + +# +# Filenames appear in typewriter. +# +sgml('<FILENAME>', "{\\tt "); +sgml('</FILENAME>', "}"); + +# +# Symbols appear in typewriter. +# +sgml('<SYMBOL>', "{\\tt "); +sgml('</SYMBOL>', "}"); + +# +# Program listings appear in the verbatim environment, which +# preserves whitespace but also prints control characters as-is +# (see the CDATA and SDATA handlers below for the special +# treatment required). +# +sgml('<PROGRAMLISTING>', "\n{\\footnotesize\\begin{verbatim}\n"); +sgml('</PROGRAMLISTING>', "\n\\end{verbatim}}\n"); + +# +# Class names appear in typewriter. +# +sgml('<CLASSNAME>', "{\\tt "); +sgml('</CLASSNAME>', "}"); + +# +# Commands (ie. methods, etc.) appear in typewriter. +# +sgml('<COMMAND>', "{\\tt "); +sgml('</COMMAND>', "}"); + +# +# Begin a formal table. The actual tabular environment will come +# later, but for now, note the table's ID (so that it can follow +# the caption -- see <TITLE>, above), and begin a floating environment +# with the following placement preference: here, top, bottom, page. +# Print tables in a small font to save space. +# +sgml('<TABLE>', sub { + my $element = shift; + push @current_table_stack,$current_table; + $current_table = $element->attribute('ID')->value || generate_id(); + my $title = $Refs->get("title:$current_table"); + output "\n\\begin{table}[htbp]\n\\footnotesize\n"; + output "\\caption{$title}\n\\label{$current_table}\n"; +}); +sgml('</TABLE>', "\\end{table}\n"); + +# +# Here is where the tables get tricky: the <TGROUP> element specifies +# the table's width in columns: instead of letting LaTeX sort out the +# width of each column, presume that we want the table 4.45 inches wide +# (!?!) and divide that width by the number of columns. This is +# a cheezy solution, but it allows the use of parboxes in the tables +# for continuous text. +# +sgml('<TGROUP>', sub { + my $element = shift; + $table_columns = $element->attribute('COLS')->value; + $width = 4.45 / $table_columns; +}); +sgml('</TGROUP>', ""); + +# +# Presume only one <THEAD> for each table, at its beginning. Begin +# the tabular element here, presuming left justification. +# +sgml('<THEAD>', sub { + output "\\vspace{2ex}\\begin{tabular}{l" . + "|l" x ($table_columns - 1) . "}\n"; +}); +sgml('</THEAD>', ""); + +# +# For end the tabular environment at the end of the table body. +# +sgml('<TBODY>', ""); +sgml('</TBODY>', "\\end{tabular}"); + +# +# Print a single line under each row in <TBODY>, but a double line under +# the row in <THEAD>. Reset the variable $row to 0 each time we begin +# a new row. +# +sgml('<ROW>', sub { $row = 0; }); +sgml('</ROW>', sub { + my $element = shift; + if ($element->in('THEAD')) { + output "\\\\ \\hline\\hline\n"; + } else { + output "\\\\ \\hline\n"; + } +}); + +# +# Here is the tricky part: use the $row variable to determine whether this +# is the _first_ entry of the row (and thus, does not require a leading "&"), +# and use parboxes for the actual entries' contents, so that they can +# take up multiple lines. Also add 4 points to the top and bottom of each +# parbox, just to make it purtier. +# +sgml('<ENTRY>', sub { + if ($row == 0) { + $row = 1; + } else { + print " & "; + } + print "\\parbox[c]{" . $width . "in}{\\raggedright\\vspace{4pt} "; +}); +sgml('</ENTRY>', "\\vspace{4pt}}\t"); + +# +# Parameters appear in slanted typewriter. +# +sgml('<PARAMETER>', "{\\tt\\sl "); +sgml('</PARAMETER>', "\\/}"); + +# +# Return values appear in typewriter. +# +sgml('<RETURNVALUE>', "{\\tt "); +sgml('</RETURNVALUE>', "}"); + +# +# Literal elements appear in typewriter. +# +sgml('<LITERAL>', "{\\tt "); +sgml('</LITERAL>', "}"); + + +######################################################################## +# Declare handlers for SDATA strings. +######################################################################## + +# +# These three will appear only in regular body text, so use simple +# replacement strings for their LaTeX equivalents. +# +sgml('|[mdash ]|', "{---}"); +sgml('|[hellip]|', "{\\ldots}"); +sgml('|[LaTeX]|', "{\\LaTeX}"); + +# +# <, >, and & could appear in the <PROGRAMLISTING> element, +# where they should appear as literal '<', '>', and '&'; otherwise, +# they need special treatment in LaTeX. Note how these handlers use +# the second argument to the handler, $event, to get the element +# currently in force. +# +sgml('|[lt ]|', sub { + my ($data,$event) = @_; + if ($event->element->name eq 'PROGRAMLISTING') { + output "\<"; # simple less-than in verbatim + } else { + output "\$\<\$"; # math less-than in regular text + } +}); +sgml('|[gt ]|', sub { + my ($data,$event) = @_; + if ($event->element->name eq 'PROGRAMLISTING') { + output "\>"; # simple greater-than in verbatim + } else { + output "\$\>\$"; # math greater-than in regular text + } +}); +sgml('|[amp ]|', sub { + my ($data,$event) = @_; + if ($event->element->name eq 'PROGRAMLISTING') { + output "\&"; # simple ampersand in verbatim + } else { + output "\\\&"; # escaped ampersand in regular text + } +}); + + +######################################################################## +# CDATA handler: escape characters which can appear as-is in SGML +# data but will cause problems in LaTeX. +######################################################################## + +sgml('cdata',sub { + my ($data,$event) = @_; + unless ($event->element->name eq 'PROGRAMLISTING') { + $data =~ s/\\/\\verb\|\\\|/g; # backslash + $data =~ s/\{/\\\{/g; # opening brace + $data =~ s/\}/\\\}/g; # closing brace + $data =~ s/\#/\\\#/g; # hash + $data =~ s/\$/\\\$/g; # dollar + $data =~ s/\%/\\\%/g; # percent + $data =~ s/\&/\\\&/g; # ampersand + $data =~ s/\~/\\\~/g; # tilde + $data =~ s/\_/\\\_/g; # underscore + $data =~ s/\^/\\\^/g; # caret + } + output $data; +}); + + +######################################################################## +# External data entity handler: deal with CDATA entities only, +# including them verbatim. For now, I use the SYSID for the +# entity's file name, though I should probably +######################################################################## + + + +sgml('entity',sub { + my $entity = shift; + my $filename = $entity->filenames->[0] || $entity->sysid; + # A strange NSGMLS-thing. + if ($filename =~ /^FILE:/ || $filename =~ /^\<FILE[^>]+\>/) { + $filename = $'; + } + if ($entity->type eq 'CDATA') { + if (-r $filename) { + unless (open INPUT, "<$filename") { + die "Cannot open external file $filename\n"; + } + while (<INPUT>) { + output $_; + } + close INPUT; + } else { + die "Cannot read file $filename\n"; + } + } else { + die "Cannot handle external entity with type " . $entity->type . "\n"; + } +}); + + +######################################################################## +# The following default handlers will catch any new elements, SDATA, +# processing instructions, or subdocument entities which I might add +# to the document later, and will report an error if they are not +# handled properly above. +######################################################################## + +sgml('start_element',sub { die "Unknown element: " . $_[0]->name; }); +sgml('sdata',sub { die "Unknown SDATA: " . $_[0]; }); +sgml('pi',sub { die "Unknown processing instruction: " . $_[0]->name; }); +sgml('start_subdoc',sub { die "Unknown subdoc entity: " . $_[0]->name; }); + + +######################################################################## +# Utility functions. +######################################################################## + +$id_counter = 1; +sub generate_id { + return "ID" . $id_counter++; +} + +1; diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..db522e1 --- /dev/null +++ b/Makefile @@ -0,0 +1,98 @@ +######################################################################## +# Makefile for installing SGMLS.pm and associated files in Unix +# or Unix-like environments. You should check the values of the +# variables at the beginning and change them as appropriate. +# +# Version: 1.03ii +######################################################################## + +DIST = SGMLSpm-1.03ii.tar.gz + +# +# Beginning of user options. +# + +# Where is the binary for perl5 located on your system? +PERL = /usr/bin/perl + +# Where do you want the sgmlspl executable script to be installed? +BINDIR = /usr/local/bin + +# Where do you put local perl5 libaries? +PERL5DIR = /usr/local/lib/perl5 +MODULEDIR = ${PERL5DIR}/SGMLS + +# Where do you want to put sgmlspl specifications? +SPECDIR = ${PERL5DIR} + +# If you plan to install the HTML version of the documentation, where +# do you intend to put it? 'make html' will create two +# subdirectories, ${HTMLDIR}/SGMLSpm and ${HTMLDIR}/sgmlspl, and place +# its files there. +HTMLDIR = /usr/local/lib/www/docs + + +# +# End of user options. +# + +HTML_SOURCES = DOC/HTML/SGMLSpm/ DOC/HTML/sgmlspl/ + +FILES = ${BINDIR}/sgmlspl \ + ${PERL5DIR}/SGMLS.pm \ + ${MODULEDIR}/Output.pm \ + ${MODULEDIR}/Refs.pm \ + ${SPECDIR}/skel.pl + +all: install docs + +install: install_system # install_html + +install_system: ${MODULEDIR} ${FILES} + +dist: ${DIST} + +${BINDIR}/sgmlspl: sgmlspl.pl + sed -e 's!/usr/bin/perl!${PERL}!' sgmlspl.pl > ${BINDIR}/sgmlspl + chmod a+x,a+r ${BINDIR}/sgmlspl + +${PERL5DIR}/SGMLS.pm: SGMLS.pm + cp SGMLS.pm ${PERL5DIR}/SGMLS.pm + chmod a+r ${PERL5DIR}/SGMLS.pm + +${MODULEDIR}: + if [ ! -d ${MODULEDIR} ]; then\ + mkdir ${MODULEDIR}; \ + chmod a+x ${MODULEDIR}; \ + fi + +${MODULEDIR}/Output.pm: Output.pm + cp Output.pm ${MODULEDIR}/Output.pm + chmod a+r ${MODULEDIR}/Output.pm + +${MODULEDIR}/Refs.pm: Refs.pm + cp Refs.pm ${MODULEDIR}/Refs.pm + chmod a+r ${MODULEDIR}/Refs.pm + +${SPECDIR}/skel.pl: skel.pl + cp skel.pl ${SPECDIR}/skel.pl + chmod a+r ${SPECDIR}/skel.pl + +install_html: ${HTML_SOURCES} + cd DOC; make html + rm -rf ${HTMLDIR}/SGMLSpm ${HTMLDIR}/sgmlspl + cp -r ${HTML_SOURCES} ${HTMLDIR} + chmod a+x,a+r ${HTMLDIR}/SGMLSpm ${HTMLDIR}/sgmlspl + chmod a+r ${HTMLDIR}/SGMLSpm/* ${HTMLDIR}/sgmlspl/* + +docs: + cd DOC; make all + +${DIST}: clean docs + cd ..; \ + tar -c -v -z --exclude RCS -f /tmp/${DIST} SGMLSpm; \ + mv /tmp/${DIST} SGMLSpm + +clean: + cd DOC; make clean + rm -f *~ core *.tar *.tar.gz diff --git a/Output.pm b/Output.pm new file mode 100644 index 0000000..4758bfa --- /dev/null +++ b/Output.pm @@ -0,0 +1,225 @@ +package SGMLS::Output; +use Carp; + +use Exporter; +@ISA = Exporter; +@EXPORT = qw(output push_output pop_output); + +$version = '$Id: Output.pm,v 1.6 1995/12/05 12:21:51 david Exp $'; + +=head1 NAME + +SGMLS::Output - Stack-based Output Procedures + +=head1 SYNOPSIS + + use SGMLS::Output; + +To print a string to the current output destination: + + output($data); + +To push a new output level to the filehandle DATA: + + push_output('handle',DATA); + +To push a new output level to the file "foo.data" (which will be +opened and closed automatically): + + push_output('file','foo.data'); + +To push a new output level to a pipe to the shell command "sort": + + push_output('pipe','sort'); + +To push a new output level I<appending> to the file "foo.data": + + push_output('append','foo.data'); + +To push a new output level to an empty string: + + push_output('string'); + +To push a new output level appending to the string "David is ": + + push_output('string',"David is "); + +To push a new output level to The Great Beyond: + + push_output('nul'); + +To revert to the previous output level: + + pop_output(); + +To revert to the previous output level, returning the contents of an +output string: + + $data = pop_output(); + +=head1 DESCRIPTION + +This library allows redirectable, stack-based output to files, pipes, +handles, strings, or nul. It is especially useful for packages like +L<SGMLS>, since handlers for individual B<SGML> elements can +temporarily change and restore the default output destination. It is +also particularly useful for capturing the contents of an element (and +its sub-elements) in a string. + +Example: + + sgmls('<title>', sub{ push_output('string'); }); + sgmls('', sub{ $title = pop_output(); }); + +In between, anything sent to B (such as CDATA) will be +accumulated in the string returned from B. + +Example: + + sgmls('', sub { push_output('nul'); }); + sgmls('', sub { pop_output(); }); + +All output will be ignored until the header has finished. + + +=head1 AUTHOR AND COPYRIGHT + +Copyright 1994 and 1995 by David Megginson, +C. Distributed under the terms of the Gnu +General Public License (version 2, 1991) -- see the file C +which is included in the B distribution. + + +=head1 SEE ALSO: + +L. + +=cut + +# +# Anonymous subroutines for handling different types of references. +# +$output_handle_sub = sub { + print $current_output_data @_; +}; + +$output_file_sub = sub { + print $current_output_data @_; +}; + +$output_string_sub = sub { + $current_output_data .= shift; + foreach (@_) { + $current_output_data .= $, . $_; + } + $current_output_data .= $\; +}; + +$output_nul_sub = sub {}; + +# +# Status variables +# +$current_output_type = 'handle'; +$current_output_data = STDOUT; +$current_output_sub = $output_handle_sub; +@output_stack = qw(); + +# +# Externally-visible functions +# + + # Send data to the output. +sub output { + &{$current_output_sub}(@_); +} + + # Push a new output destination. +sub push_output { + my ($type,$data) = @_; + push @output_stack, [$current_output_type,$current_output_data, + $current_output_sub]; + SWITCH: { + $type eq 'handle' && do { + # Force unqualified filehandles into caller's package + my ($package) = caller; + $data =~ s/^[^':]+$/$package\:\:$&/; + + $current_output_sub = $output_handle_sub; + $current_output_type = 'handle'; + $current_output_data = $data; + last SWITCH; + }; + $type eq 'file' && do { + $current_output_sub = $output_file_sub; + my $handle = new_handle(); + open($handle,">$data") || croak "Cannot create file $data.\n"; + $current_output_type = 'file'; + $current_output_data = $handle; + last SWITCH; + }; + $type eq 'pipe' && do { + $current_output_sub = $output_file_sub; + my $handle = new_handle(); + open($handle,"|$data") || croak "Cannot open pipe to $data.\n"; + $current_output_type = 'file'; + $current_output_data = $handle; + last SWITCH; + }; + $type eq 'append' && do { + $current_output_sub = $output_file_sub; + my $handle = new_handle(); + open($handle,">>$data") || croak "Cannot append to file $data.\n"; + $current_output_type = 'file'; + $current_output_data = $handle; + last SWITCH; + }; + $type eq 'string' && do { + $current_output_sub = $output_string_sub; + $current_output_type = 'string'; + $current_output_data = $data; + last SWITCH; + }; + $type eq 'nul' && do { + $current_output_sub = $output_nul_sub; + $current_output_type = 'nul'; + $current_output_data = ''; + last SWITCH; + }; + croak "Unknown output type: $type.\n"; + } +} + + # Pop the current output destination. +sub pop_output { + my ($old_type,$old_data) = ($current_output_type,$current_output_data); + ($current_output_type,$current_output_data,$current_output_sub) = + @{pop @output_stack}; + SWITCH: { + $old_type eq 'handle' && do { + return $old_data; + }; + $old_type eq 'file' && do { + close($old_data); + return ''; + }; + $old_type eq 'string' && do { + return $old_data; + }; + $old_type eq 'nul' && do { + return ''; + }; + croak "Unknown output type: $type.\n"; + } +} + +# +# Local Utility functions. +# +$new_handle_counter = 1; + +sub new_handle { + return "IOHandle" . $new_handle_counter++; +} + +1; diff --git a/README b/README new file mode 100644 index 0000000..7a9cec3 --- /dev/null +++ b/README @@ -0,0 +1,105 @@ + SGMLS.PM: A PERL5 CLASS LIBRARY FOR USE WITH THE + SGMLS AND NSGMLS PARSERS + + Version 1.03ii + + + David Megginson, + University of Ottawa + + +[This is the fourth public release. See "CHANGES", below, for a +summary of the most important changes since version 1.02. For +detailed changes, see ChangeLog and DOC/ChangeLog; for known bugs, see +BUGS.] + + +This distribution contains SGMLS.pm, a perl5 class library for parsing +the output from James Clark's SGMLS and NSGMLS parsers. This is a +free package, but it is _not_ Public-Domain software -- please see the +file COPYING in this directory for the terms of use. + +Full documentation is available in SGML format the DOC directory, and +in Postscript, LaTeX, and HTML format in the various subdirectories of +the DOC directory. Documentation in POD format is also included +within each of the modules, and once they are installed, you can view +them with perldoc (SGMLS, SGMLS::Ouptut, or SGMLS::Refs) -- there is +no POD documentation in sgmlspl yet. + +Take a look in the elisp/ subdirectory for a special package for Gnu +Emacs -- it will allow you to run (n)sgmls and sgmlspl from within an +Emacs buffer containing an SGML file, displaying the output (LaTeX, +for example) in another Emacs buffer. + +I have included a Unix Makefile for installing the package on Unix and +Unix-like systems. Make any necessary changes at the top of the +Makefile, then type + + make install + +to install the perl5 scripts on your system. If you want to test the +installation, change to the DOC subdirectory, read the comments at the +top of the make file there, then type + + make clean; make all + +to regenerate the LaTeX and HTML documentation using the scripts which +you have just installed (you will have to have the SGMLS or NSGMLS +parser, the Docbook 2.2.1 DTD and SGML declaration, and the ISOpub and +ISOnum entity sets available on your system -- copies are included in +the DOC/Extras subdirectory). You may also type + + make dvi + +or + + make ps + +to produce DVI or Postscript versions of the documentation for +printing. + + +** nb: Effective with v.1.02, SGMLS.pm supports the new output from +NSGMLS 0.4.13 with the -h option. + + +CHANGES + +There have been some major changes since version 1.02. File handles +passed as arguments to SGMLS::new should be qualified in the correct +package now. The Output.pm module has been moved, so you should now +type + + use SGMLS::Output; + +instead of + + use Output; + +in all of your scripts. There is also a new package, SGMLS::Refs, to +maintain a database of forward references from one processing pass to +another, like the LaTeX .aux files. Once you have installed SGMLS.pm, +you can type + + perldoc SGMLS::Refs + +at the shell prompt for more information, or you can read the new +section in the SGML documentation for sgmlspl. All of the +documentation for this package is available online through my home +page, listed below. + +As mentioned above, there is also a convenient package for Gnu Emacs +19 in the elisp/ directory. + + +Enjoy! + +David Megginson + + +EMAIL: dmeggins@aix1.uottawa.ca +WWW: http://www.uottawa.ca/~dmeggins +Canada Post: Department of English, + University of Ottawa, + Ottawa, CANADA + diff --git a/Refs.pm b/Refs.pm new file mode 100644 index 0000000..3e3f563 --- /dev/null +++ b/Refs.pm @@ -0,0 +1,216 @@ +package SGMLS::Refs; + +use Carp; + +$version = '$Id: Refs.pm,v 1.5 1995/12/03 21:28:36 david Exp $'; + +=head1 NAME + +SGMLS::Refs + +=head1 SYNOPSIS + + use SGMLS::Refs; + +To create a new reference-manager object using the file "foo.refs": + + my $refs = new SGMLS::Refs("foo.refs"); + +To create a new reference-manager object using the file "foo.refs" and +logging changes to the file "foo.log": + + my $refs = new SGMLS::Refs("foo.refs","foo.log"); + +To record a reference: + + $refs->put("document title",$title); + +To retrieve a reference: + + $title = $refs->get("document title"); + +To return the number of references changed since the last run: + + $num = $refs->changed; + +To print a LaTeX-like warning if any references have changed: + + $refs->warn; + +=head1 DESCRIPTION + +This library can be used together with the B package to keep +track of forward references from one run to another, like the B +C<.aux> files. Each reference manager is an object which reads and +then rewrites a file of perl source, with the file name provided by +the caller. + +Example: + + # Start up the reference manager before the parse. + sgml('start', sub { $refs = new SGMLS::Refs("foo.refs"); }); + + # Warn about any changed references at the end. + sgml('end', sub { $refs->warn; }); + + # Look up the title from the last parse, if available. + sgml('
    ', sub { + my $element = shift; + my $id = $element->attribute(ID)->value; + my $title = $refs->get("title:$id") || "[no title available]"; + + $current_div_id = $id; + + output "\\section{$title}\n\n"; + }); + + + # Save the title for the next parse. + sgml('', sub { push_output('string'); }); + sgml('', sub { + my $title = pop_output(); + my $id = $current_div_id; + + $refs->put("title:$id",$title); + }); + + +=head1 AUTHOR AND COPYRIGHT + +Copyright 1994 and 1995 by David Megginson, +C. Distributed under the terms of the Gnu +General Public License (version 2, 1991) -- see the file C +which is included in the B distribution. + + +=head1 SEE ALSO: + +L, L. + +=cut + +# +# Create a new instance of a reference manager. The first argument is +# the filename for the database, and the second (if present) is a +# filename for logging changes. +# +sub new { + my ($class,$filename,$logname) = (@_); + my $self = {}; + my $handle = generate_handle(); + my $loghandle = generate_handle() if $logname; + my $oldRS = $/; # Save old record separator. + + # Read the current contents of the reference file (if any). + if (open($handle,"<$filename")) { + $/ = 0777; + $self->{'refs'} = eval <$handle> || {}; + close $handle; + } else { + $self->{'refs'} = {}; + } + + # Open the reference file. + open($handle,">$filename") || croak $@; + + # Open the log file, if any. + if ($logname) { + open($loghandle,">$logname") || croak $@; + } + + # Note pertinent information. + $self->{'change_count'} = 0; + $self->{'handle'} = $handle; + $self->{'loghandle'} = $loghandle; + $self->{'filename'} = $filename; + $self->{'logname'} = $logname; + + $/ = $oldRS; # Restore old record separator. + return bless $self; +} + +# +# Set a reference's value. If the value is unchanged, don't do anything; +# otherwise, note the change by counting it and (optionally) logging it +# to the file handle provided when the object was created. +# +sub put { + my ($self,$key,$value) = (@_); + my $loghandle = $self->{'loghandle'}; + my $oldvalue = $self->{'refs'}->{$key}; + + if ($oldvalue ne $value) { + $self->{'change_count'}++; + if ($loghandle) { + print $loghandle "\"$key\" changed from " . + + "\"$oldvalue\" to \"$value\".\n"; + } + $self->{'refs'}->{$key} = $value; + } + + return $oldvalue; +} + +# +# Grab the value of a reference. +# +sub get { + my ($self,$key) = (@_); + + return $self->{'refs'}->{$key}; +} + +# +# Return the number of changed references. +# +sub changed { + my $self = shift; + return $self->{'changed_count'}; +} + +# +# Print a warning if any references have +# changed (a la LaTeX -- so that the user knows that another pass is +# necessary). Return 1 if a warning has been printed, or 0 if it +# was unnecessary. +# +sub warn { + my $self = shift; + my $count = $self->{'change_count'}; + my $filename = $self->{'filename'}; + my $plural = "references have"; + + $plural = "reference has" if $count == 1; + if ($count > 0) { + warn "SGMLS::Refs ($filename): $count $plural changed.\n"; + return 1; + } + return 0; +} + +sub DESTROY { + my $self = shift; + my $handle = $self->{'handle'}; + + close $self->{'loghandle'}; + + print $handle "{\n"; + foreach $key (keys %{$self->{'refs'}}) { + my $value = $self->{'refs'}->{$key}; + $key =~ s/\\/\\\\/g; + $key =~ s/'/\\'/g; + $value =~ s/\\/\\\\/g; + $value =~ s/'/\\'/g; + print $handle " '$key' => '$value',\n"; + } + print $handle " '' => ''\n}\n"; +} + +$handle_counter = 1; +sub generate_handle { + return "Handle" . $handle_counter++; +} + +1; + diff --git a/SGMLS.pm b/SGMLS.pm new file mode 100644 index 0000000..b29395e --- /dev/null +++ b/SGMLS.pm @@ -0,0 +1,893 @@ +package SGMLS; +use Carp; + +$version = '$Revision: 1.14 $'; + +=head1 NAME + +SGMLS - class for postprocessing the output from the B and +B parsers. + +=head1 SYNOPSIS + + use SGMLS; + + my $parse = new SGMLS(STDIN); + + my $event = $parse->next_event; + while ($event) { + + SWITCH: { + + ($event->type eq 'start_element') && do { + my $element = $event->data; # An object of class SGMLS_Element + [[your code for the beginning of an element]] + last SWITCH; + }; + + ($event->type eq 'end_element') && do { + my $element = $event->data; # An object of class SGMLS_Element + [[your code for the end of an element]] + last SWITCH; + }; + + ($event->type eq 'cdata') && do { + my $cdata = $event->data; # A string + [[your code for character data]] + last SWITCH; + }; + + ($event->type eq 'sdata') && do { + my $sdata = $event->data; # A string + [[your code for system data]] + last SWITCH; + }; + + ($event->type eq 're') && do { + [[your code for a record end]] + last SWITCH; + }; + + ($event->type eq 'pi') && do { + my $pi = $event->data; # A string + [[your code for a processing instruction]] + last SWITCH; + }; + + ($event->type eq 'entity') && do { + my $entity = $event->data; # An object of class SGMLS_Entity + [[your code for an external entity]] + last SWITCH; + }; + + ($event->type eq 'start_subdoc') && do { + my $entity = $event->data; # An object of class SGMLS_Entity + [[your code for the beginning of a subdoc entity]] + last SWITCH; + }; + + ($event->type eq 'end_subdoc') && do { + my $entity = $event->data; # An object of class SGMLS_Entity + [[your code for the end of a subdoc entity]] + last SWITCH; + }; + + ($event->type eq 'conforming') && do { + [[your code for a conforming document]] + last SWITCH; + }; + + die "Internal error: unknown event type " . $event->type . "\n"; + } + + $event = $parse->next_event; + } + +=head1 DESCRIPTION + +The B package consists of several related classes: see +L<"SGMLS">, L<"SGMLS_Event">, L<"SGMLS_Element">, +L<"SGMLS_Attribute">, L<"SGMLS_Notation">, and L<"SGMLS_Entity">. All +of these classes are available when you specify + + use SGMLS; + +Generally, the only object which you will create explicitly will +belong to the C class; all of the others will then be created +automatically for you over the course of the parse. Much fuller +documentation is available in the C<.sgml> files in the C +directory of the C distribution. + +=head2 The C class + +This class holds a single parse. When you create an instance of it, +you specify a file handle as an argument (if you are reading the +output of B or B from a pipe, the file handle will +ordinarily be C): + + my $parse = new SGMLS(STDIN); + +The most important method for this class is C, which reads +and returns the next major event from the input stream. It is +important to note that the C class deals with most B +events itself: attributes and entity definitions, for example, are +collected and stored automatically and invisibly to the user. The +following list contains all of the methods for the C class: + +=item C: Return an C object containing the +next major event from the SGML parse. + +=item C: Return an C object containing the +current element in the document. + +=item C: Return a string containing the name of the current +SGML source file (this will work only if the C<-l> option was given to +B or B). + +=item C: Return a string containing the current line number +from the source file (this will work only if the C<-l> option was +given to B or B). + +=item C: Return a string containing the C +parameter (if any) from the SGML declaration. + +=item C: Return an C object +representing the notation named C. With newer versions of +B, all notations are available; otherwise, only the notations +which are actually used will be available. + +=item C: Return an C object representing +the entity named C. With newer versions of B, all +entities are available; otherwise, only external data entities and +internal entities used as attribute values will be available. + +=item C: Return a reference to an associative array for +user-defined extensions. + + +=head2 The C class + +This class holds a single major event, as generated by the +C method in the C class. It uses the following +methods: + +=item C: Return a string describing the type of event: +"start_element", "end_element", "cdata", "sdata", "re", "pi", +"entity", "start_subdoc", "end_subdoc", and "conforming". See +L<"SYNOPSIS">, above, for the values associated with each of these. + +=item C: Return the data associated with the current event (if +any). For "start_element" and "end_element", returns an +C object; for "entity", "start_subdoc", and +"end_subdoc", returns an C object; for "cdata", "sdata", +and "pi", returns a string; and for "re" and "conforming", returns the +empty string. See L<"SYNOPSIS">, above, for an example of this +method's use. + +=item C: Return a string key to the event, such as an element +or entity name (otherwise, the same as C). + +=item C: Return the current file name, as in the C +class. + +=item C: Return the current line number, as in the C +class. + +=item C: Return the current element, as in the C +class. + +=item C: Return the C object which generated the +event. + +=item C: Look up an entity, as in the C class. + +=item C: Look up a notation, as in the C +class. + +=item C: Return a reference to an associative array for +user-defined extensions. + + +=head2 The C class + +This class is used for elements, and contains all associated +information (such as the element's attributes). It recognises the +following methods: + +=item C: Return a string containing the name, or Generic +Identifier, of the element, in upper case. + +=item C: Return the C object for the +element's parent (if any). + +=item C: Return the C object for the current parse. + +=item C: Return a reference to an associative array of +attribute names and C structures. Attribute names +will be all in upper case. + +=item C: Return an array of strings containing the +names of all attributes defined for the current element, in upper +case. + +=item C: Return the C structure for +the attribute C. + +=item C: Add the C object +C to the current element, replacing any other attribute +structure with the same name. + +=item C: Return C (ie. 1) if the string C is the +name of the current element's parent, or C (ie. 0) if it is +not. + +=item C: Return C (ie. 1) if the string C is the +name of any of the ancestors of the current element, or C +(ie. 0) if it is not. + +=item C: Return a reference to an associative array for +user-defined extensions. + + +=head2 The C class + +Each instance of an attribute for each C is an object +belonging to this class, which recognises the following methods: + +=item C: Return a string containing the name of the current +attribute, all in upper case. + +=item C: Return a string containing the type of the current +attribute, all in upper case. Available types are "IMPLIED", "CDATA", +"NOTATION", "ENTITY", and "TOKEN". + +=item C: Return the value of the current attribute, if any. +This will be an empty string if the type is "IMPLIED", a string of +some sort if the type is "CDATA" or "TOKEN" (if it is "TOKEN", you may +want to split the string into a series of separate tokens), an +C object if the type is "NOTATION", or an +C object if the type is "ENTITY". Note that if the +value is "CDATA", it will I have escape sequences for 8-bit +characters, record ends, or SDATA processed -- that will be your +responsibility. + +=item C: Return C (ie. 1) if the value of the +attribute is implied, or C (ie. 0) if it is specified in the +document. + +=item C: Change the type of the attribute to the +string C (which should be all in upper case). Available types +are "IMPLIED", "CDATA", "NOTATION", "ENTITY", and "TOKEN". + +=item C: Change the value of the attribute to +C, which may be a string, an C object, or an +C subject, depending on the attribute's type. + +=item C: Return a reference to an associative array available +for user-defined extensions. + + +=head2 The C class + +All declared notations appear as objects belonging to this class, +which recognises the following methods: + +=item C: Return a string containing the name of the notation. + +=item C: Return a string containing the system identifier of +the notation, if any. + +=item C: Return a string containing the public identifier of +the notation, if any. + +=item C: Return a reference to an associative array available +for user-defined extensions. + + +=head2 The C class + +All declared entities appear as objects belonging to this class, which +recognises the following methods: + +=item C: Return a string containing the name of the entity, in +mixed case. + +=item C: Return a string containing the type of the entity, in +upper case. Available types are "CDATA", "SDATA", "NDATA" (external +entities only), "SUBDOC", "PI" (newer versions of B only), or +"TEXT" (newer versions of B only). + +=item C: Return a string containing the value of the entity, +if it is internal. + +=item C: Return a string containing the system identifier of +the entity (if any), if it is external. + +=item C: Return a string containing the public identifier of +the entity (if any), if it is external. + +=item C: Return an array of strings containing any file +names generated from the identifiers, if the entity is external. + +=item C: Return the C object associated +with the entity, if it is external. + +=item C: Return a reference to an associative array +of data attribute names (in upper case) and the associated +C objects for the current entity. + +=item C: Return an array of data attribute +names (in upper case) for the current entity. + +=item C: Return the C object +for the data attribute named C for the current entity. + +=item C: Add the C object +C to the current entity, replacing any other data attribute +with the same name. + +=item C: Return a reference to an associative array for +user-defined extensions. + + +=head1 AUTHOR AND COPYRIGHT + +Copyright 1994 and 1995 by David Megginson, +C. Distributed under the terms of the Gnu +General Public License (version 2, 1991) -- see the file C +which is included in the B distribution. + + +=head1 SEE ALSO: + +L and L. + +=cut + +# +# Data class for a single SGMLS ESIS output event. The object will +# keep information about its own current element and, if available, +# the source file and line where the event appeared. +# +# Event types are as follow: +# Event Data +# ------------------------------------------------------- +# 'start_element' SGMLS_Element +# 'end_element' SGMLS_Element +# 'cdata' string +# 'sdata' string +# 're' [none] +# 'pi' string +# 'entity' SGMLS_Entity +# 'start_subdoc' SGMLS_Entity +# 'end_subdoc' SGMLS_Entity +# 'conforming' [none] +# +package SGMLS_Event; +use Carp; + # Constructor. +sub new { + my ($class,$type,$data,$parse) = @_; + return bless [$type, + $data, + $parse->file, + $parse->line, + $parse->element, + $parse, + {} + ]; +} + # Accessors. +sub type { return $_[0]->[0]; } +sub data { return $_[0]->[1]; } +sub file { return $_[0]->[2]; } +sub line { return $_[0]->[3]; } +sub element { return $_[0]->[4]; } +sub parse { return $_[0]->[5]; } +sub ext { return $_[0]->[6]; } + # Generate a key for the event. +sub key { + my $self = shift; + if (ref($self->data) eq SGMLS_Element || + ref($self->data) eq SGMLS_Entity) { + return $self->data->name; + } else { + return $self->data; + } +} + # Look up an entity in the parse. +sub entity { + my ($self,$ename) = (@_); + return $self->parse->entity($ename); +} + # Look up a notation in the parse. +sub notation { + my ($self,$nname) = (@_); + return $self->parse->notation($nname); +} + + +# +# Data class for a single SGML attribute. The object will know its +# type, and will keep a value unless the type is 'IMPLIED', in which +# case no meaningful value is available. +# +# Attribute types are as follow: +# Type Value +# --------------------------------------- +# IMPLIED [none] +# CDATA string +# NOTATION SGMLS_Notation +# ENTITY SGMLS_Entity +# TOKEN string +# +package SGMLS_Attribute; +use Carp; + # Constructor. +sub new { + my ($class,$name,$type,$value) = @_; + return bless [$name,$type,$value,{}]; +} + # Accessors. +sub name { return $_[0]->[0]; } +sub type { return $_[0]->[1]; } +sub value { return $_[0]->[2]; } +sub ext { return $_[0]->[3]; } + # Return 1 if the value is implied. +sub is_implied { + my $self = shift; + return ($self->type eq 'IMPLIED'); +} + # Set the attribute's type. +sub set_type { + my ($self,$type) = @_; + $self->[1] = $type; +} + + # Set the attribute's value. +sub set_value { + my ($self,$value) = @_; + $self->[2] = $value; +} + + +# +# Data class for a single element of an SGML document. The object will not +# know about its children (data or other elements), but it keeps track of its +# parent and its attributes. +# +package SGMLS_Element; +use Carp; + # Constructor. +sub new { + my ($class,$name,$parent,$attributes,$parse) = @_; + return bless [$name,$parent,$attributes,$parse,{}]; +} + # Accessors. +sub name { return $_[0]->[0]; } +sub parent { return $_[0]->[1]; } +sub parse { return $_[0]->[3]; } +sub ext { return $_[0]->[4]; } + + # Return the associative array of + # attributes, parsing it the first + # time through. +sub attributes { + my $self = shift; + if (ref($self->[2]) eq 'ARRAY') { + my $new = {}; + foreach (@{$self->[2]}) { + /^(\S+) (IMPLIED|CDATA|NOTATION|ENTITY|TOKEN)( (.*))?$/ + || croak "Bad attribute event data: $_"; + my ($name,$type,$value) = ($1,$2,$4); + if ($type eq 'NOTATION') { + $value = $self->parse->notation($value); + } elsif ($type eq 'ENTITY') { + $value = $self->parse->entity($value); + } + $new->{$name} = + new SGMLS_Attribute($name,$type,$value); + } + $self->[2] = $new; + } + return $self->[2]; +} + # Return a list of attribute names. +sub attribute_names { + my $self = shift; + return keys(%{$self->attributes}); +} + # Find an attribute by name. +sub attribute { + my ($self,$aname) = @_; + return $self->attributes->{$aname}; +} + # Add a new attribute. +sub set_attribute { + my ($self,$attribute) = @_; + $self->attributes->{$attribute->name} = $attribute; +} + # Check parent by name. +sub in { + my ($self,$name) = @_; + if ($self->parent && $self->parent->name eq $name) { + return $self->parent; + } else { + return ''; + } +} + # Check ancestors by name. +sub within { + my ($self,$name) = @_; + for ($self = $self->parent; $self; $self = $self->parent) { + return $self if ($self->name eq $name); + } + return ''; +} + + +# +# Data class for an SGML notation. The only information available +# will be the name, the sysid, and the pubid -- the rest is up to the +# processing application. +# +package SGMLS_Notation; +use Carp; + # Constructor. +sub new { + my ($class,$name,$sysid,$pubid) = @_; + return bless [$name,$sysid,$pubid,{}]; +} + # Accessors. +sub name { return $_[0]->[0]; } +sub sysid { return $_[0]->[1]; } +sub pubid { return $_[0]->[2]; } +sub ext { return $_[0]->[3]; } + +# +# Data class for a single SGML entity. All entities will have a name +# and a type. Internal entities will be of type CDATA or SDATA only, +# and will have a value rather than a notation and sysid/pubid. External +# CDATA, NDATA, and SDATA entities will always have notations attached, +# and SUBDOC entities are always external (and will be parsed by SGMLS). +# +# Entity types are as follow: +# Type Internal External +# ----------------------------------------------------------- +# CDATA x x +# NDATA x +# SDATA x x +# SUBDOC x +# (newer versions of NSGMLS only:) +# PI x +# TEXT x x +# +package SGMLS_Entity; +use Carp; + # Constructor. +sub new { + my ($class,$name,$type,$value,$sysid,$pubid,$filenames,$notation) = @_; + return bless [$name,$type,$value,{},$sysid,$pubid,$filenames,$notation,{}]; +} + # Accessors. +sub name { return $_[0]->[0]; } +sub type { return $_[0]->[1]; } +sub value { return $_[0]->[2]; } +sub data_attributes { return $_[0]->[3]; } +sub sysid { return $_[0]->[4]; } +sub pubid { return $_[0]->[5]; } +sub filenames { return $_[0]->[6]; } +sub notation { return $_[0]->[7]; } +sub ext { return $_[0]->[8]; } + # Return a list of data-attribute names. +sub data_attribute_names { + my $self = shift; + return keys(%{$self->data_attributes}); +} + # Find a data attribute by name. +sub data_attribute { + my ($self,$aname) = @_; + return $self->data_attributes->{$aname}; +} + # Add a new data attribute. +sub set_data_attribute { + my ($self,$data_attribute) = @_; + $self->data_attributes()->{$data_attribute->name} = $data_attribute; +} + + + +# +# Data class for a single SGMLS parse. The constructor takes a single +# argument, a file handle from which the SGMLS ESIS events will be read +# (it may be a pipe, a fifo, a file, a socket, etc.). It is essential +# that no two SGMLS objects have the same handle. +# +package SGMLS; + # Constructor. +sub new { + my ($class,$handle) = @_; + + # Force unqualified filehandles into caller's package + my ($package) = caller; + $handle =~ s/^[^':]+$/$package\:\:$&/; + + return bless { + 'handle' => $handle, + 'event_stack' => [], + 'current_element' => '', + 'current_attributes' => [], + 'current_entities' => {}, + 'entity_stack' => [], + 'current_notations' => {}, + 'notation_stack' => [], + 'current_sysid' => '', + 'current_pubid' => '', + 'current_filenames' => [], + 'current_file' => '', + 'current_line' => '', + 'appinfo' => '', + 'ext' => {} + }; +} + # Accessors. +sub element { return $_[0]->{'current_element'}; } +sub file { return $_[0]->{'current_file'}; } +sub line { return $_[0]->{'current_line'}; } +sub appinfo { return $_[0]->{'appinfo'}; } +sub ext { return $_[0]->{'ext'}; } + + # Given its name, look up a notation. +sub notation { + my ($self,$nname) = @_; + return $self->{'current_notations'}->{$nname}; +} + # Given its name, look up an entity. +sub entity { + my ($self,$ename) = @_; + return $self->{'current_entities'}->{$ename}; +} + + # Return the next SGMLS_Event, or '' + # if the document has finished. +sub next_event { + my $self = shift; + my $handle = $self->{'handle'}; + + # If there are any queued up events, + # grab them first. + if ($#{$self->{event_stack}} >= 0) { + return pop @{$self->{event_stack}}; + } + + dispatch: while (!eof($handle)) { + + my $c = getc($handle); + my $data = <$handle>; + chop $data; + + ($c eq '(') && do { # start an element + $self->{'current_element'} = + new SGMLS_Element($data, + $self->{'current_element'}, + $self->{'current_attributes'}, + $self); + $self->{'current_attributes'} = []; + return new SGMLS_Event('start_element', + $self->{'current_element'}, + $self); + }; + + ($c eq ')') && do { # end an element + my $old = $self->{'current_element'}; + $self->{'current_element'} = $self->{'current_element'}->parent; + return new SGMLS_Event('end_element',$old,$self); + }; + + ($c eq '-') && do { # some data + my $sdata_flag = 0; + my $out = ''; + while ($data =~ /\\(\\|n|\||[0-7]{1,3})/) { + $out .= $`; + $data = $'; + # beginning or end of SDATA + if ($1 eq '|') { + if ("$out" ne '') { + unshift(@{$self->{'event_stack'}}, + new SGMLS_Event($sdata_flag?'sdata':'cdata', + $out, + $self)); + $out = ''; + } + $sdata_flag = !$sdata_flag; + # record end + } elsif ($1 eq 'n') { + if ("$out" ne '') { + unshift(@{$self->{'event_stack'}}, + new SGMLS_Event($sdata_flag?'sdata':'cdata', + $out, + $self)); + $out = ''; + } + unshift(@{$self->{'event_stack'}}, + new SGMLS_Event('re','',$self)); + } elsif ($1 eq '\\') { + $out .= '\\'; + } else { + $out .= chr(oct($1)); + } + } + $out .= $data; + if ("$out" ne '') { + unshift(@{$self->{'event_stack'}}, + new SGMLS_Event($sdata_flag?'sdata':'cdata', + $out, + $self)); + } + return $self->next_event; + }; + + ($c eq '&') && do { # external entity reference + return new SGMLS_Event('entity', + ($self->{'current_entities'}->{$data} + || croak "Unknown external entity: $data\n"), + $self); + }; + + ($c eq '?') && do { # processing instruction + return new SGMLS_Event('pi', + $data, + $self); + }; + + ($c eq 'A') && do { # attribute declaration + # (will parse only on demand) + push @{$self->{'current_attributes'}}, $data; + next dispatch; + }; + + ($c eq 'a') && do { # link attribute declaration + # NOT YET IMPLEMENTED! + next dispatch; + }; + + ($c eq 'D') && do { # data attribute declaration + $data =~ /^(\S+) (\S+) (\S+)( (.*))?$/ + || croak "Bad data-attribute event data: $data"; + my ($ename,$aname,$type,$value) = ($1,$2,$3,$5); + my $entity = $self->{'current_entities'}->{$ename}; + my $attribute = new SGMLS_Attribute($aname,$type,$value); + $entity->set_data_attribute($attribute); + next dispatch; + }; + + ($c eq 'N') && do { # notation declaration + $self->{'current_notations'}->{$data} = + new SGMLS_Notation($data, + $self->{'current_sysid'}, + $self->{'current_pubid'}); + $self->{'current_sysid'} = ''; + $self->{'current_pubid'} = ''; + next dispatch; + }; + + ($c eq 'E') && do { # external entity declaration + $data =~ /^(\S+) (\S+) (\S+)$/ + || croak "Bad external entity event data: $data"; + my ($name,$type,$nname) = ($1,$2,$3); + my $notation = $self->{'current_notations'}->{$nname} if $nname; + $self->{'current_entities'}->{$name} = + new SGMLS_Entity($name, + $type, + '', + $self->{'current_sysid'}, + $self->{'current_pubid'}, + $self->{'current_filenames'}, + $notation); + $self->{'current_sysid'} = ''; + $self->{'current_pubid'} = ''; + $self->{'current_filenames'} = []; + next dispatch; + }; + + ($c eq 'I') && do { # internal entity declaration + $data =~ /^(\S+) (\S+) (.*)$/ + || croak "Bad external entity event data: $data"; + my ($name,$type,$value) = ($1,$2,$3); + $self->{'current_entities'}->{$name} = + new SGMLS_Entity($name, $type, $value); + next dispatch; + }; + + ($c eq 'T') && do { # external text entity declaration + $self->{'current_entities'}->{$data} = + new SGMLS_Entity($data, + 'TEXT', + '', + $self->{'current_sysid'}, + $self->{'current_pubid'}, + $self->{'current_filenames'}, + ''); + $self->{'current_sysid'} = ''; + $self->{'current_pubid'} = ''; + $self->{'current_filenames'} = []; + next dispatch; + }; + + ($c eq 'S') && do { # subdocument entity declaration + $self->{'current_entities'}->{$data} = + new SGMLS_Entity($data, + 'SUBDOC', + '', + $self->{'current_sysid'}, + $self->{'current_pubid'}, + $self->{'current_filenames'}, + ''); + $self->{'current_sysid'} = ''; + $self->{'current_pubid'} = ''; + $self->{'current_filenames'} = []; + next dispatch; + }; + + ($c eq 's') && do { # system id + $self->{'current_sysid'} = $data; + next dispatch; + }; + + ($c eq 'p') && do { # public id + $self->{'current_pubid'} = $data; + next dispatch; + }; + + ($c eq 'f') && do { # generated filename + push @{$self->{'current_filenames'}}, $data; + next dispatch; + }; + + ($c eq '{') && do { # begin subdocument entity + my $subdoc = ($self->{'current_entities'}->{$data}|| + croak "Unknown SUBDOC entity $data\n"); + push @{$self->{'notation_stack'}}, $self->{'current_notations'}; + push @{$self->{'entity_stack'}}, $self->{'current_entities'}; + $self->{'current_notations'} = {}; + $self->{'current_entities'} = {}; + return new SGMLS_Event('start_subdoc', + $subdoc, + $self); + }; + + ($c eq '}') && do { # end subdocument entity + $self->{'current_notations'} = pop @{$self->{'notation_stack'}}; + $self->{'current_entities'} = pop @{$self->{'entity_stack'}}; + return new SGMLS_Event('end_subdoc', + ($self->{'current_entities'}->{$data} || + croak "Unknown SUBDOC entity $data\n"), + $self); + }; + + ($c eq 'L') && do { # line number (and file name) + $data =~ /^(\d+)( (.*))?$/; + $self->{'current_line'} = $1; + $self->{'current_file'} = $3 if $3; + next dispatch; + }; + + ($c eq '#') && do { # APPINFO parameter + $self->{'appinfo'} = $data; + next dispatch; + }; + + ($c eq 'C') && do { # document is conforming + return new SGMLS_Event('conforming','',$self); + }; + } + return ''; +} + +1; + +######################################################################## +# Local Variables: +# mode: perl +# End: +######################################################################## diff --git a/TODO b/TODO new file mode 100644 index 0000000..6c31e66 --- /dev/null +++ b/TODO @@ -0,0 +1,6 @@ +SGMLS.pm: things to do + +- add support for SDATA, etc. in attribute values (see BUGS). +- add a switch to skel.pl to allow handlers to be sorted into +alphabetical order if desired. +- allow for more customisation in SGMLS.pm data classes. \ No newline at end of file diff --git a/elisp/README b/elisp/README new file mode 100644 index 0000000..99cbe16 --- /dev/null +++ b/elisp/README @@ -0,0 +1,22 @@ +This directory contains sgmls.el, a LISP front-end for the SGMLS +parser and any associated post-processors. By default, it is set up +to use James Clark's sgmls with the sgmlsasp post-processor, but you +can reconfigure it for use with sgmlspl by adding the following code +to your ~/.emacs file: + + (setq-default sgmls-postprocessor "sgmlspl") + +You can also choose a central directory for your sgmlspl scripts, as +in the following example: + + (setq-default sgmls-spec-directory "~/lib/perl5") + +There are many, many more options -- read the comments at the top of +the file for details. + + +David Megginson, +Acting Director, Writing Centre, +University of Ottawa + + \ No newline at end of file diff --git a/elisp/sgmls.el b/elisp/sgmls.el new file mode 100644 index 0000000..2f4d0d9 --- /dev/null +++ b/elisp/sgmls.el @@ -0,0 +1,437 @@ +;;;; sgmls.el --- LISP front end for SGMLS and a postprocessor. + +;;; Copyright (C) 1994 David Megginson + +;;; Author: David Megginson + +;;; Like Gnu Emacs, this program is free software; you can redistribute +;;; it and/or modify it under the terms of the GNU General Public +;;; License as published by the Free Software Foundation; either +;;; version 2 of the License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with Gnu Emacs, which is required to run it; if not, write to +;;; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA +;;; 02139, USA. + + +;;;; Commentary + +;;; A LISP front end for the SGMLS parser and any postprocessors. + +;;; This package provides a convenient front end for the free SGMLS +;;; parser and any postprocessors (such as perl scripts or the simple +;;; SGMLSASP program supplied with SGMLS). With a single command, +;;; emacs will cause an SGML source file to be parsed and processed in +;;; the background, placing the final output in a separate buffer. + + +;;;; USAGE + +;;; Place this file somewhere in your load path, byte-compile it if +;;; you wish (it is not speed-critical), and add one of the following +;;; sequences to your default.el or .emacs file: +;;; +;;; (autoload 'sgmls "sgmls" nil t) +;;; (autoload 'sgmls-edit-spec "sgmls" nil t) +;;; (autoload 'sgmls-start-process "sgmls") +;;; +;;; __OR__ +;;; +;;; (require 'sgmls) +;;; +;;; The first will load sgmls.el only upon demand, while the second +;;; will load it unconditionally whenever emacs starts. You may then +;;; run the package simply by typing "M-x sgmls" (you may, of course, +;;; prefer to bind it to a keystroke). + + +;;;; CONFIGURATION + +;;; This package is highly configurable, but its default setup should +;;; work well for the average user. All of the options are documented +;;; in the next section under "User Options" -- the most important are +;;; `sgmls-spec', which contains the name of the file containing a +;;; specification for the postprocessor; `sgmls-spec-directory', which +;;; contains a default directory for the specs; `sgmls-postprocessor', +;;; which contains the name of the postprocessor program ("sgmlsasp" +;;; by default); and `sgmls-decl' (or `sgml-declaration' from +;;; psgml.el), which contains the name of the file containing an SGML +;;; declaration. All of these options are buffer-local, and may be +;;; set in the "Local Variables:" section of a file. +;;; +;;; If you call `sgmls' with prefix argument, you will be given an +;;; opportunity to review and override all of the default settings. + + +;;;; KNOWN BUGS + +;;; Because of the way that Emacs and this package handles the +;;; processes, errors are not handled cleanly. The exit status +;;; displayed when the process terminates applies only to the +;;; postprocessor; the exit status of the SGMLS parser itself, which +;;; is the first element in the pipeline, is not indicated unless the +;;; postprocessor is smart enough to notice that something is wrong. +;;; Furthermore, since Emacs always mixes the stdout and stderr of its +;;; subprocesses together, any error messages will simply be embedded +;;; in the middle of the output buffer. + + + +;;;; User Options. + +(defvar sgmls-parser-command nil + "*Command for running SGMLS.") +(make-variable-buffer-local 'sgmls-parser-command) + +(defvar sgmls-postprocessor-command nil + "*Command for running SGMLS postprocessor") +(make-variable-buffer-local 'sgmls-postprocessor-command) + +(defvar sgmls-parser "sgmls" + "*Name of SGMLS program on local system (buffer-local).") +(make-variable-buffer-local 'sgmls-parser) + +(defvar sgmls-parser-options "" + "*Options for SGMLS parser.") +(make-variable-buffer-local 'sgmls-parser-options) + +(defvar sgmls-declaration nil + "*Default SGML declaration (buffer-local). +If this variable is not set, the variable `sgml-declaration' (from +psgml.el) will be tried.") +(make-variable-buffer-local 'sgmls-declaration) + +(defvar sgmls-source-file nil + "*Default SGML source file (buffer-local). +By default, this will usually be the file associated with the current +buffer, but it can also be generated from the `sgml-parent-document' +variable (from psgml.el), or set manually.") +(make-variable-buffer-local 'sgmls-source-file) + +(defvar sgmls-postprocessor "sgmlsasp" + "*Name of SGMLS postprocessor on local system (buffer-local).") +(make-variable-buffer-local 'sgmls-postprocessor) + +(defvar sgmls-postprocessor-options "" + "*Options for SGMLS postprocessor.") +(make-variable-buffer-local 'sgmls-postprocessor-options) + +(defvar sgmls-spec nil + "*Default specification file or argument for postprocessor (buffer-local). +This may be specified relative to `sgmls-spec-directory'.") +(make-variable-buffer-local 'sgmls-spec) + +(defvar sgmls-spec-directory nil + "*Default directory for sgmls-spec.") +(make-variable-buffer-local 'sgmls-spec-directory) + +(defvar sgmls-output-file nil + "*Default file name for sgmls output file (buffer-local). +This may be specified relative to `sgmls-output-directory', and may be +generated automatically from the source file using `sgmls-output-file-ext'.") +(make-variable-buffer-local 'sgmls-output-file) + +(defvar sgmls-output-file-ext nil + "*Default extension for sgmls output file (buffer-local). +If `sgmls-output-file' is not set, it will be generated from the source +file using the extension provided here.") +(make-variable-buffer-local 'sgmls-output-file-ext) + +(defvar before-sgmls-hooks nil + "*Hooks to run on the output buffer before SGMLS starts (buffer-local).") +(make-variable-buffer-local 'before-sgmls-hooks) + +(defvar after-sgmls-hooks nil + "*Hooks to run on the output buffer after SGMLS finishes (buffer-local).") +(make-variable-buffer-local 'after-sgmls-hooks) + + +;;; Internal variables. + +(defvar sgmls::parser-error-file nil) +(make-variable-buffer-local 'sgmls::parser-error-file) + +(defvar sgmls::postprocessor-error-file) +(make-variable-buffer-local 'sgmls::postprocessor-error-file) + + + +;;;; User-level commands and functions. + +;; +;; Interactive function to set up command line and run sgmls. +;; +(defun sgmls (flag) + "Run sgmls and a postprocessor, putting the output into a file buffer. +With a prefix argument, allow the caller to review and override any +default values. The variables `sgmls-parser' and +`sgmls-postprocessor' contain the actual names of the programs which +will be run (in a shell command sent to `shell-file-name')." + + (interactive "P") + + (if (or flag (not sgmls-parser-command)) + (setq sgmls-parser-command + (format "%s %s %s %s" + sgmls-parser + sgmls-parser-options + (sgmls::generate-declaration) + (sgmls::generate-source-file)))) + (setq sgmls-parser-command + (read-string "Parser command: " + (cons sgmls-parser-command 1))) + + (if (or flag (not sgmls-postprocessor-command)) + (setq sgmls-postprocessor-command + (format "%s %s %s" + sgmls-postprocessor + sgmls-postprocessor-options + (sgmls::generate-spec flag)))) + (setq sgmls-postprocessor-command + (read-string "Postprocessor command: " + (cons sgmls-postprocessor-command 1))) + + (sgmls-start-process + (format "%s 2>%s | %s 2>%s" + sgmls-parser-command + (sgmls::generate-parser-error-file) + sgmls-postprocessor-command + (sgmls::generate-postprocessor-error-file)) + (find-file-noselect (sgmls::generate-output-file flag)))) + + +;; +;; Edit the spec associated with a buffer. +;; +(defun sgmls-edit-spec () + "Edit the SGMLS spec associated with a buffer. +If the local variable `sgmls-spec' is set, the file will be loaded with +`find-file-other-window'; otherwise, an error will be signaled." + (interactive) + (if sgmls-spec + (find-file-other-window + (expand-file-name sgmls-spec sgmls-spec-directory)) + (error "No spec is currently assigned (see `sgmls-spec')."))) + + +;; +;; Actually run sgmls and the sgmls post-processor. +;; +(defun sgmls-start-process (command buffer) + "Run an SGMLS command, placing its output into the given buffer. +The command should be a string which will be passed to the shell." + (save-some-buffers) + (let ((old-buffer (current-buffer)) + (parser-error-file sgmls::parser-error-file) + (postprocessor-error-file sgmls::postprocessor-error-file) + (before-hooks before-sgmls-hooks) + (after-hooks after-sgmls-hooks) + proc) + (set-buffer buffer) + (display-buffer buffer) + (setq sgmls::parser-error-file parser-error-file) + (setq sgmls::postprocessor-error-file postprocessor-error-file) + (setq before-sgmls-hooks before-hooks) + (setq after-sgmls-hooks after-hooks) + (if (and (> (point-max) (point-min)) + (yes-or-no-p + (format "Discard current contents of buffer %s? " + (buffer-name buffer)))) + (erase-buffer)) + (goto-char (point-max)) + (run-hooks 'before-sgmls-hooks) + (set-buffer old-buffer) + (setq proc (start-process "sgmls" + buffer + "nice" + shell-file-name + "-c" + command)) + (set-process-sentinel proc (function sgmls::process-sentinel)) + (message "Converting from %s to %s (process: %s)..." + (file-name-nondirectory sgmls-source-file) + (file-name-nondirectory sgmls-output-file) + (process-name proc)) + proc)) + + + +;;;; Internal functions. + +;; +;; Sentinel for end of program run. +;; +(defun sgmls::process-sentinel (proc message) + (let ((old-buffer (current-buffer)) + (stat (process-status proc)) + msg) + (unwind-protect + (progn + (set-buffer (process-buffer proc)) + (cond ((eq stat 'exit) + (setq msg + (format + "SGMLS: process \"%s\" finished with status %d." + (process-name proc) + (process-exit-status proc))) + (goto-char (point-min)) + (run-hooks 'after-sgmls-hooks) + (display-buffer (process-buffer proc))) + ((or (eq stat 'signal) (eq stat 'closed)) + (error "SGMLS: %S %s." proc message))) + (sgmls::check-error-files proc + sgmls::parser-error-file + sgmls::postprocessor-error-file)) + (delete-file sgmls::parser-error-file) + (delete-file sgmls::postprocessor-error-file) + (set-buffer old-buffer) + (if msg (message msg))))) + +;; +;; Check whether there is anything in the error files. +;; +(defun sgmls::check-error-files (proc parser-file postprocessor-file) + (let ((old-buffer (current-buffer)) + (parser-buffer + (get-buffer-create (concat "**" + (buffer-name + (process-buffer proc)) + ": parser errors**"))) + (postprocessor-buffer + (get-buffer-create (concat "**" + (buffer-name + (process-buffer proc)) + ": postprocessor errors**"))) + window) + (set-buffer parser-buffer) + (erase-buffer) + (insert-file parser-file) + (if (> (point-max) (point-min)) + (setq window (display-buffer parser-buffer)) + (kill-buffer parser-buffer)) + (if window + (set-window-dedicated-p window t)) + (set-buffer postprocessor-buffer) + (erase-buffer) + (insert-file postprocessor-file) + (if (> (point-max) (point-min)) + (display-buffer postprocessor-buffer) + (kill-buffer postprocessor-buffer)) + (set-buffer old-buffer) + (cond (window + (set-window-dedicated-p window nil) + (balance-windows))))) + +;; +;; Generate a declaration to use for sgmls. By default, return the +;; empty string unless `sgmls-decl' or `sgml-declaration' is set to +;; something. Flag currently has no effect. +;; +(defun sgmls::generate-declaration () + (cond (sgmls-declaration) + ((and (boundp 'sgml-declaration) sgml-declaration) sgml-declaration) + (t ""))) + +;; +;; Return the name of a source file to use. Will try +;; `sgml-parent-document' (from psgml.el) first, then will look for +;; the buffer's file name, then will prompt only if all else fails. +;; +(defun sgmls::generate-source-file () + (cond ((boundp 'sgmls-parent-document) + (if (consp sgml-parent-document) + (setq sgmls-source-file (car sgml-parent-document)) + (setq sgmls-source-file sgml-parent-document))) + (t (setq sgmls-source-file + (file-name-nondirectory (buffer-file-name))))) + sgmls-source-file) + +;; +;; Return the name of a spec to use. +;; +(defun sgmls::generate-spec (flag) + (let ((buffer-file-name nil)) + (if sgmls-spec-directory + (setq sgmls-spec-directory + (file-name-as-directory sgmls-spec-directory))) + (if (or flag (null sgmls-spec)) + (setq sgmls-spec + (read-file-name + "SGMLS spec: " + (if sgmls-spec + (file-name-directory sgmls-spec) + sgmls-spec-directory) + nil + 1 + (if sgmls-spec + (file-name-nondirectory sgmls-spec))))) + (if (and (file-readable-p sgmls-spec) + (not (file-directory-p sgmls-spec))) + sgmls-spec + (if (and (file-readable-p + (expand-file-name sgmls-spec sgmls-spec-directory)) + (not (file-directory-p + (expand-file-name sgmls-spec sgmls-spec-directory)))) + (setq sgmls-spec (expand-file-name sgmls-spec sgmls-spec-directory)) + (error "SGMLS spec \"%s\" is not readable or is a directory." + sgmls-spec))))) + +;; +;; Return the name of a temporary file to use for recording errors +;; from the parser or the postprocessor. +;; +(defun sgmls::generate-parser-error-file () + (setq sgmls::parser-error-file + (make-temp-name "/tmp/sgmlspa"))) + +;; +;; Return the name of a file for SGMLS postprocessor output. +;; +(defun sgmls::generate-output-file (flag) + + ;; Try to set up default values. + (if (and (not sgmls-output-file) + sgmls-output-file-ext + (buffer-file-name) + (or (string-match "^\\(.*\\)\\(\\.[^.]*\\)$" (buffer-file-name)) + (string-match "^\\(.+\\)$" (buffer-file-name)))) + (setq sgmls-output-file + (expand-file-name + (concat (substring (buffer-file-name) + (match-beginning 1) + (match-end 1)) + "." + sgmls-output-file-ext)))) + + ;; Prompt if necessary. + (setq sgmls-output-file + (read-file-name "SGMLS output file: " + nil + sgmls-output-file + nil + (if sgmls-output-file + (file-name-nondirectory + sgmls-output-file)))) + + (if (string= (expand-file-name sgmls-source-file) + (expand-file-name sgmls-output-file)) + (progn + (setq sgmls-output-file nil) + (error "SGMLS: source file and output file are the same."))) + + sgmls-output-file) + +;; +;; Generate the name of a temporary file to use for postprocessor errors. +;; +(defun sgmls::generate-postprocessor-error-file () + (setq sgmls::postprocessor-error-file + (make-temp-name (concat "/tmp/sgmlspp")))) + +(provide 'sgmls) diff --git a/packaging/perl-SGMLSpm.changes b/packaging/perl-SGMLSpm.changes new file mode 100644 index 0000000..76da9e0 --- /dev/null +++ b/packaging/perl-SGMLSpm.changes @@ -0,0 +1,75 @@ +* Fri Aug 31 19:28:21 UTC 2012 - tracy.graydon@intel.com +- TIVI-153: add as dependency for iputils. Imported from MeeGo trunk + +* Wed Feb 27 2008 Tom "spot" Callaway - 1.03ii-18 +- Rebuild for perl 5.10 (again) + +* Fri Jan 25 2008 Tom "spot" Callaway - 1.03ii-17 +- rebuild for new perl + +* Thu Oct 26 2007 Ondrej Vasik - 1.03ii-16.4 +- added base documentation +- fixed indents + +* Mon Oct 22 2007 Ondrej Vasik - 1.03ii-16.3 +- added dist tag +- License to GPL+ +- spec file cleanup (all things from merge review by pnemade #226278) + +* Wed Jul 12 2006 Jesse Keating - 1.03ii-16.2.1 +- rebuild + +* Fri Feb 03 2006 Jason Vas Dias - 1.03ii-16.2 +- rebuild for new perl-5.8.8 + +* Fri Dec 16 2005 Jesse Keating +- rebuilt for new gcc + +* Fri Dec 16 2005 Jesse Keating +- rebuilt for new gcj + +* Sat Apr 30 2005 Jose Pedro Oliveira - 1.03ii-15 +- Specfile cleanup. (#156483) + +* Wed Sep 22 2004 Than Ngo +- rebuilt + +* Fri Feb 13 2004 Elliot Lee +- rebuilt + +* Wed May 28 2003 Tim Waugh 1.03ii-12 +- Use vendorlib not sitelib (bug #73493). +- Own %%{perldir}/SGMLS (bug #73922). + +* Tue Jan 28 2003 Tim Waugh 1.03ii-11 +- Rebuilt. + +* Wed Jan 22 2003 Tim Powers +- rebuilt + +* Sat Dec 14 2002 Tim Powers +- don't use rpms internal dep generator + +* Mon Nov 25 2002 Tim Waugh 1.03ii-9 +- Fix URL (bug #71895). + +* Mon Nov 25 2002 Tim Waugh 1.03ii-8 +- Rebuild to get automatic provides: right. + +* Wed Nov 20 2002 Tim Powers 1.03ii-7 +- rebuild in current collinst + +* Mon Jun 17 2002 Tim Waugh 1.03ii-6 +- Rebuild in new environment. + +* Sun Jan 14 2001 Tim Waugh 1.03ii-5 +- Add defattr to files section. + +* Mon Jan 08 2001 Tim Waugh +- Change group. +- rm before install. +- Change Copyright: to License:. +- Remove Packager: line. + +* Mon Jan 08 2001 Tim Waugh +- Based on Eric Bischoff's new-trials packages. diff --git a/packaging/perl-SGMLSpm.spec b/packaging/perl-SGMLSpm.spec new file mode 100644 index 0000000..f9d4203 --- /dev/null +++ b/packaging/perl-SGMLSpm.spec @@ -0,0 +1,46 @@ +#specfile originally created for Fedora, modified for Moblin Linux +Name: perl-SGMLSpm +Version: 1.03ii +Release: 18 +Summary: Perl library for parsing the output of nsgmls + +Group: Development/Libraries +License: GPLv2+ +URL: http://search.cpan.org/dist/SGMLSpm/ +Source0: http://www.cpan.org/authors/id/D/DM/DMEGG/SGMLSpm-%{version}.tar.gz +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) + +BuildArch: noarch +BuildRequires: perl +Requires: perl(:MODULE_COMPAT_%(eval "`%{__perl} -V:version`"; echo $version)) +Requires: openjade + +%description +Perl programs can use the SGMLSpm module to help convert SGML, HTML or XML +documents into new formats. + + +%prep +%setup -q -n SGMLSpm + +%build + +%install +rm -rf $RPM_BUILD_ROOT +install -d -m 755 $RPM_BUILD_ROOT{%{_bindir},%{perl_vendorlib}} +make install_system \ + BINDIR=$RPM_BUILD_ROOT%{_bindir} \ + PERL5DIR=$RPM_BUILD_ROOT%{perl_vendorlib} + +%clean +rm -rf $RPM_BUILD_ROOT + + +%files +%defattr(-,root,root,-) +%doc README COPYING +%{_bindir}/sgmlspl +%{perl_vendorlib}/SGMLS* +%{perl_vendorlib}/skel.pl + + diff --git a/sgmlspl.pl b/sgmlspl.pl new file mode 100755 index 0000000..c91a6cb --- /dev/null +++ b/sgmlspl.pl @@ -0,0 +1,317 @@ +#!/usr/bin/perl +######################################################################## +# sgmlspl: a simple SGML postprocesser for the SGMLS and NSGMLS +# parsers (requires SGMLS.pm library). +# +# Copyright (c) 1995 by David Megginson +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +# $Log: sgmlspl.pl,v $ +# Revision 1.8 1995/12/03 21:46:31 david +# Eliminated all use of the SGMLS_Event::key method. +# +# Revision 1.7 1995/11/15 20:22:24 david +# Changed "use Output" to "use SGMLS::Output". Qualified the STDIN +# filehandle for the SGMLS object with the main:: package name. +# +# Revision 1.6 1995/08/12 16:15:14 david +# Revised version for 1.01 distribution. +# +# Revision 1.5 1995/04/27 11:52:25 david +# Changed 'print' to 'main::output' for re handler; empty string +# translates into an empty sub {} rather than a sub printing an empty +# string; instead of evaluating every argument as a perl script, take +# only the first as a perl script and the rest as its arguments; allow +# empty scripts or scripts which do not end with '1;'; pass the event +# itself as the second argument to each handler, after the event data. +# +# Revision 1.4 1995/04/23 14:44:58 david +# Use the Output package. Fixed the $version variable. +# +# Revision 1.3 1995/04/22 21:02:49 david +# Added some missing 'last SWITCH;' statements in the sgmls function. +# +# Revision 1.2 1995/04/22 20:58:48 david +# Added $SGMLS_PL::version variable and changed SDATA notation from +# [SDATA] to |SDATA|. +# +# Revision 1.1 1995/04/22 14:40:50 david +# Initial revision +# +######################################################################## + +use SGMLS::Output; + +package SGMLS_PL; +use SGMLS; + +$version = '$Id: sgmlspl.pl,v 1.8 1995/12/03 21:46:31 david Exp $'; + +# +# Set up handler defaults. +# +$start_document_handler = sub {}; +$end_document_handler = sub {}; +$start_element_handlers = { '' => sub {} }; +$end_element_handlers = { '' => sub {} }; +$cdata_handler = sub { main::output($_[0]); }; +$sdata_handlers = { '' => sub { main::output($_[0]);} }; +$re_handler = sub { main::output("\n"); }; +$pi_handler = sub { '' => sub {} }; +$entity_handlers = { '' => sub {} }; +$start_subdoc_handlers = { '' => sub {} }; +$end_subdoc_handlers = { '' => sub {} }; +$conforming_handler = sub {}; + +# +# Main access point: declare handlers for different SGML events. +# +# Usage: sgml(event, handler); +# +# The event may be one of the following strings, or a special pattern. +# The generic events are as follow: +# +# 'start' The beginning of the document. +# 'end' The end of the document. +# 'start_element' The beginning of an element. +# 'end_element' The end of an element. +# 'cdata' Regular character data. +# 'sdata' Special system-specific data. +# 're' A record-end. +# 'pi' A processing instruction. +# 'entity' An external-entity reference. +# 'start_subdoc' The beginning of a subdocument entity. +# 'end_subdoc' The end of a subdocument entity. +# 'conforming' The document is conforming. +# +# In addition to these generic events, it is possible to handlers +# for certain specific, named events, as follow: +# +# '' The beginning of element GI. +# '' The end of element GI. +# '[SDATA]' The system-specific data SDATA. +# '&ENAME;' A reference to the external entity ENAME. +# '{ENAME}' The beginning of the subdocument-entity ENAME. +# '{/ENAME}' The end of the subdocument-entity ENAME. +# +# +# The handler may be a string, which will simply be printed when the +# event occurs (this is usually useful only for the specific, named +# events), or a reference to an anonymous subroutine, which will +# receive two arguments: the event data and the event itself. For +# example, +# +# sgml('', "\n\\begin{foo}\n"); +# +# and +# +# sgml('', sub { output("\n\\begin{foo}\n"); }); +# +# will have identical results. +# +sub main::sgml { + my ($spec,$handler) = (@_); + if (ref($handler) ne 'CODE') { + $handler =~ s/\\/\\\\/g; + $handler =~ s/'/\\'/g; + if ($handler eq '') { + $handler = sub {}; + } else { + $handler = eval "sub { main::output('$handler'); };"; + } + } + SWITCH: { + # start-document handler + $spec eq 'start' && do { + $start_document_handler = $handler; + last SWITCH; + }; + # end-document handler + $spec eq 'end' && do { + $end_document_handler = $handler; + last SWITCH; + }; + # start-element handler + $spec =~ /^<([^\/].*|)>$/ && do { + $start_element_handlers->{$1} = $handler; + last SWITCH; + }; + # generic start-element handler + $spec eq 'start_element' && do { + $start_element_handlers->{''} = $handler; + last SWITCH; + }; + # end-element handler + $spec =~ /^<\/(.*)>$/ && do { + $end_element_handlers->{$1} = $handler; + last SWITCH; + }; + # generic end-element handler + $spec =~ 'end_element' && do { + $end_element_handlers->{''} = $handler; + last SWITCH; + }; + # cdata handler + $spec eq 'cdata' && do { + $cdata_handler = $handler; + last SWITCH; + }; + # sdata handler + $spec =~ /^\|(.*)\|$/ && do { + $sdata_handlers->{$1} = $handler; + last SWITCH; + }; + # generic sdata handler + $spec eq 'sdata' && do { + $sdata_handlers->{''} = $handler; + last SWITCH; + }; + # record-end handler + $spec eq 're' && do { + $re_handler = $handler; + last SWITCH; + }; + # processing-instruction handler + $spec eq 'pi' && do { + $pi_handler = $handler; + last SWITCH; + }; + # entity-reference handler + $spec =~ /^\&(.*);$/ && do { + $entity_handlers->{$1} = $handler; + last SWITCH; + }; + # generic entity-reference handler + $spec eq 'entity' && do { + $entity_handlers->{''} = $handler; + last SWITCH; + }; + # start-subdoc handler + $spec =~ /^\{([^\/].*|)\}$/ && do { + $start_subdoc_handlers->{$1} = $handler; + last SWITCH; + }; + # generic start-subdoc handler + $spec eq 'start_subdoc' && do { + $start_subdoc_handlers->{''} = $handler; + last SWITCH; + }; + # end-subdoc handler + $spec =~ /^\{\/(.*)\}$/ && do { + $end_subdoc_handlers->{$1} = $handler; + last SWITCH; + }; + # generic end-subdoc handler + $spec eq 'end_subdoc' && do { + $end_subdoc_handlers->{''} = $handler; + last SWITCH; + }; + # conforming handler + $spec eq 'conforming' && do { + $conforming_handler = $handler; + last SWITCH; + }; + + die "Bad SGML handler pattern: $spec\n"; + } +} + + +# +# The first argument on the command line is a perl module which will be +# read here and evaluated in the 'main' package -- everything else will +# be an argument to it. +# +package main; + +$ARGV = shift; +unless ($ARGV eq '' || do $ARGV) { + if (!-e $ARGV) { + die "FATAL: $ARGV does not exist.\n"; + } elsif (!-r $ARGV) { + die "FATAL: $ARGV exists but is read-protected.\n"; + } elsif ($@) { + die "FATAL: $@\n"; + } +} + + +# +# Do the actual work, using the SGMLS package. +# +package SGMLS_PL; + +$parse = new SGMLS(main::STDIN); # a new parse object + +&{$start_document_handler}(); # run the start handler. + + # run the appropriate handler for each + # event +while ($event = $parse->next_event) { + my $type = $event->type; + SWITCH: { + $type eq 'start_element' && do { + &{($start_element_handlers->{$event->data->name}|| + $start_element_handlers->{''} || sub {})}($event->data,$event); + last SWITCH; + }; + $type eq 'end_element' && do { + &{($end_element_handlers->{$event->data->name}|| + $end_element_handlers->{''} || sub {})}($event->data,$event); + last SWITCH; + }; + $type eq 'cdata' && do { + &{$cdata_handler}($event->data,$event); + last SWITCH; + }; + $type eq 'sdata' && do { + &{($sdata_handlers->{$event->data}|| + $sdata_handlers->{''} || sub {})}($event->data,$event); + last SWITCH; + }; + $type eq 're' && do { + &{$re_handler}($event->data,$event); + last SWITCH; + }; + $type eq 'pi' && do { + &{$pi_handler}($event->data,$event); + last SWITCH; + }; + $type eq 'entity' && do { + &{($entity_handlers->{$event->data->name}|| + $entity_handlers->{''} || sub {})}($event->data,$event); + last SWITCH; + }; + $type eq 'start_subdoc' && do { + &{($start_subdoc_handlers->{$event->data->name}|| + $start_subdoc_handlers->{''} || sub {})}($event->data,$event); + last SWITCH; + }; + $type eq 'end_subdoc' && do { + &{($end_subdoc_handlers->{$event->data->name}|| + $end_subdoc_handlers->{''} || sub {})}($event->data,$event); + last SWITCH; + }; + $type eq 'conforming' && do { + &{$conforming_handler}($event->data,$event); + last SWITCH; + }; + + die "Unknown SGML event type: $type\n"; + } +} + +&{$end_document_handler}(); # run the end handler diff --git a/skel.pl b/skel.pl new file mode 100644 index 0000000..f10c1ff --- /dev/null +++ b/skel.pl @@ -0,0 +1,178 @@ +######################################################################## +# skel.pl: an SGMLSPL script for producing scripts (!!). +# +# Copyright (c) 1995 by David Megginson +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +# +# Changes: +# +# $Log: skel.pl,v $ +# Revision 1.4 1995/11/15 20:21:07 david +# Changed "use Output" to "use SGMLS::Output". +# +# Revision 1.3 1995/08/24 15:04:38 david +# Fixed commented-out 're' handler. +# +# Revision 1.2 1995/08/12 16:16:42 david +# Revised version for 1.01 distribution. +# +# Revision 1.1 1995/04/23 14:49:35 david +# Initial revision +# +######################################################################## + +use SGMLS; +use SGMLS::Output; + +$version = '$Id: skel.pl,v 1.4 1995/11/15 20:21:07 david Exp $'; + +%subdocs = (); # Subdocument entities seen so far. +%entities = (); # External data entities seen so far. +%sdata = (); # SDATA strings seen so far. +%elements = (); # Elements seen so far. +$pi = 0; # Any processing instructions? + +$intro = 0; # Have we printed the banner yet? + +$| = 1; + +sgml('end_element', ''); # Ignore the ends of elements. +sgml('end_subdoc', ''); # Ignore the ends of subdocument entities. +sgml('cdata', ''); # Ignore CDATA. +sgml('re', ''); # Ignore Record Ends. + + # Note any processing instructions. +sgml('pi', sub { $pi = 1; }); + + # Keep track of all subdocument entities. +sgml('start_subdoc', sub { + my $entity = shift; + $entities{$entity->name} = 1; +}); + # Keep track of all external data entities. +sgml('entity', sub { + my $entity = shift; + $entities{$entity->name} = 1; +}); + # Keep track of all SDATA strings +sgml('sdata', sub { + my $sdata = shift; + $sdata{$sdata} = 1; +}); + + # Display element handlers as they appear. +sgml('start_element', sub { + my $element = shift; + unless ($intro) { + $intro = 1; + do_intro($element->name); + } + if (!$elements{$element->name}) { + output "# Element: " . $element->name . "\n"; + output "sgml('<" . $element->name . ">', \"\");\n"; + output "sgml('name . ">', \"\");\n\n"; + $elements{$element->name} = 1; + } +}); + +sgml('end', sub { + # generate subdoc handlers + my @keys = keys(%subdocs); + if ($#keys > 0) { + output "#\n# Subdocument Entity Handlers\n#\n\n"; + foreach (@keys) { + output "# Subdocument Entity: $_\n"; + output "sgml('{" . $_ . "}', \"\");\n"; + output "sgml('{/" . $_ . "}', \"\");\n\n"; + } + } + # generate entity handlers + my @keys = keys(%entities); + if ($#keys > 0) { + output "#\n# External Data Entity Handlers\n#\n\n"; + foreach (@keys) { + output "sgml('&" . $_ . ";', \"\");\n"; + } + } + # generate sdata handlers + my @keys = keys(%sdata); + if ($#keys > 0) { + output "#\n# SDATA Handlers\n#\n\n"; + foreach (@keys) { + output "sgml('|" . $_ . "|', \"\");\n"; + } + } + + if ($pi) { + output "#\n# Processing-Instruction Handler\n#\n"; + output "sgml('pi', sub {});\n\n"; + } + + output <name; }); +# sgml('end_element',''); +# sgml('cdata',sub { output \$_[0]; }); +# sgml('sdata',sub { die "Unknown SDATA: " . \$_[0]; }); +# sgml('re',"\\n"); +# sgml('pi',sub { die "Unknown processing instruction: " . \$_[0]; }); +# sgml('entity',sub { die "Unknown external entity: " . \$_[0]->name; }); +# sgml('start_subdoc',sub { die "Unknown subdoc entity: " . \$_[0]->name; }); +# sgml('end_subdoc',''); +# sgml('conforming',''); + +1; +END +}); + + + + # Function to print the banner. +sub do_intro { + my $doctype = shift; + output <next_event) { + print "Event type: " . $event->type; + print "Data: " . $event->data; + print "File: " . $event->file || "[unavailable]"; + print "Line: " . $event->line || "[unavailable]"; + print ""; +} -- 2.7.4