Imported Upstream version 2.9.4
[platform/upstream/libxml2.git] / os400 / libxmlrpg / HTMLparser.rpgle
1       * Summary: interface for an HTML 4.0 non-verifying parser
2       * Description: this module implements an HTML 4.0 non-verifying parser
3       *              with API compatible with the XML parser ones. It should
4       *              be able to parse "real world" HTML, even if severely
5       *              broken from a specification point of view.
6       *
7       * Copy: See Copyright for the status of this software.
8       *
9       * Author: Patrick Monnerat <pm@datasphere.ch>, DATASPHERE S.A.
10
11       /if not defined(HTML_PARSER_H__)
12       /define HTML_PARSER_H__
13
14       /include "libxmlrpg/xmlversion"
15
16       /if defined(LIBXML_HTML_ENABLED)
17
18       /include "libxmlrpg/xmlTypesC"
19       /include "libxmlrpg/parser"
20
21       * Most of the back-end structures from XML and HTML are shared.
22
23      d htmlParserCtxtPtr...
24      d                 s                   based(######typedef######)
25      d                                     like(xmlParserCtxtPtr)
26
27      d htmlParserCtxt  ds                  based(htmlParserCtxtPtr)
28      d                                     likeds(xmlParserCtxt)
29
30      d htmlParserNodeInfoPtr...
31      d                 s                   based(######typedef######)
32      d                                     like(xmlParserNodeInfoPtr)
33
34      d htmlParserNodeInfo...
35      d                 ds                  based(htmlParserNodeInfoPtr)
36      d                                     likeds(xmlParserNodeInfo)
37
38      d htmlSAXHandlerPtr...
39      d                 s                   based(######typedef######)
40      d                                     like(xmlSAXHandlerPtr)
41
42      d htmlSAXHandler  ds                  based(htmlSAXHandlerPtr)
43      d                                     likeds(xmlSAXHandler)
44
45      d htmlParserInputPtr...
46      d                 s                   based(######typedef######)
47      d                                     like(xmlParserInputPtr)
48
49      d htmlParserInput...
50      d                 ds                  based(htmlParserInputPtr)
51      d                                     likeds(xmlParserInput)
52
53      d htmlDocPtr      s                   based(######typedef######)
54      d                                     like(xmlDocPtr)
55
56      d htmlNodePtr     s                   based(######typedef######)
57      d                                     like(xmlNodePtr)
58
59       * Internal description of an HTML element, representing HTML 4.01
60       * and XHTML 1.0 (which share the same structure).
61
62      d htmlElemDescPtr...
63      d                 s               *   based(######typedef######)
64
65      d htmlElemDesc    ds                  based(htmlElemDescPtr)
66      d                                     align qualified
67      d  name                           *                                        const char *
68      d  startTag                           like(xmlCchar)                       Start tag implied ?
69      d  endTag                             like(xmlCchar)                       End tag implied ?
70      d  saveEndTag                         like(xmlCchar)                       Save end tag ?
71      d  empty                              like(xmlCchar)                       Empty element ?
72      d  depr                               like(xmlCchar)                       Deprecated element ?
73      d  dtd                                like(xmlCchar)                       Loose DTD/Frameset
74      d  isinline                           like(xmlCchar)                       Block 0/inline elem?
75      d  desc                           *                                        const char *
76       *
77       * New fields encapsulating HTML structure
78       *
79       * Bugs:
80       *      This is a very limited representation.  It fails to tell us when
81       *      an element *requires* subelements (we only have whether they're
82       *      allowed or not), and it doesn't tell us where CDATA and PCDATA
83       *      are allowed.  Some element relationships are not fully represented:
84       *      these are flagged with the word MODIFIER
85       *
86      d  subelts                        *                                        const char * *
87      d  defaultsubelt                  *                                        const char *
88      d  attrs_opt                      *                                        const char * *
89      d  attrs_depr                     *                                        const char * *
90      d  attrs_req                      *                                        const char * *
91
92       * Internal description of an HTML entity.
93
94      d htmlEntityDescPtr...
95      d                 s               *   based(######typedef######)
96
97      d htmlEntityDesc...
98      d                 ds                  based(htmlEntityDescPtr)
99      d                                     align qualified
100      d  value                              like(xmlCuint)
101      d  name                           *                                        const char *
102      d  desc                           *                                        const char *
103
104       * There is only few public functions.
105
106      d htmlTagLookup   pr                  extproc('htmlTagLookup')
107      d                                     like(htmlElemDescPtr)                const
108      d  tag                            *   value options(*string)               const xmlChar *
109
110      d htmlEntityLookup...
111      d                 pr                  extproc('htmlEntityLookup')
112      d                                     like(htmlEntityDescPtr)              const
113      d  name                           *   value options(*string)               const xmlChar *
114
115      d htmlEntityValueLookup...
116      d                 pr                  extproc('htmlEntityValueLookup')
117      d                                     like(htmlEntityDescPtr)              const
118      d  value                              value like(xmlCuint)
119
120      d htmlIsAutoClosed...
121      d                 pr                  extproc('htmlIsAutoClosed')
122      d                                     like(xmlCint)
123      d  doc                                value like(htmlDocPtr)
124      d  elem                               value like(htmlNodePtr)
125
126      d htmlAutoCloseTag...
127      d                 pr                  extproc('htmlAutoCloseTag')
128      d                                     like(xmlCint)
129      d  doc                                value like(htmlDocPtr)
130      d  name                           *   value options(*string)               const xmlChar *
131      d  elem                               value like(htmlNodePtr)
132
133      d htmlParseEntityRef...
134      d                 pr                  extproc('htmlParseEntityRef')
135      d                                     like(htmlEntityDescPtr)              const
136      d  ctxt                               value like(htmlParserCtxtPtr)
137      d  str                            *                                        const xmlChar *(*)
138
139      d htmlParseCharRef...
140      d                 pr                  extproc('htmlParseCharRef')
141      d                                     like(xmlCint)
142      d  ctxt                               value like(htmlParserCtxtPtr)
143
144      d htmlParseElement...
145      d                 pr                  extproc('htmlParseElement')
146      d  ctxt                               value like(htmlParserCtxtPtr)
147
148      d htmlNewParserCtxt...
149      d                 pr                  extproc('htmlNewParserCtxt')
150      d                                     like(htmlParserCtxtPtr)
151
152      d htmlCreateMemoryParserCtxt...
153      d                 pr                  extproc('htmlCreateMemoryParserCtxt')
154      d                                     like(htmlParserCtxtPtr)
155      d  buffer                         *   value options(*string)               const char *
156      d  size                               value like(xmlCint)
157
158      d htmlParseDocument...
159      d                 pr                  extproc('htmlParseDocument')
160      d                                     like(xmlCint)
161      d  ctxt                               value like(htmlParserCtxtPtr)
162
163      d htmlSAXParseDoc...
164      d                 pr                  extproc('htmlSAXParseDoc')
165      d                                     like(htmlDocPtr)
166      d  cur                            *   value options(*string)               xmlChar *
167      d  encoding                       *   value options(*string)               const char *
168      d  sax                                value like(htmlSAXHandlerPtr)
169      d  userData                       *   value                                void *
170
171      d htmlParseDoc    pr                  extproc('htmlParseDoc')
172      d                                     like(htmlDocPtr)
173      d  cur                            *   value options(*string)               xmlChar *
174      d  encoding                       *   value options(*string)               const char *
175
176      d htmlSAXParseFile...
177      d                 pr                  extproc('htmlSAXParseFile')
178      d                                     like(htmlDocPtr)
179      d  filename                       *   value options(*string)               const char *
180      d  encoding                       *   value options(*string)               const char *
181      d  sax                                value like(htmlSAXHandlerPtr)
182      d  userData                       *   value                                void *
183
184      d htmlParseFile   pr                  extproc('htmlParseFile')
185      d                                     like(htmlDocPtr)
186      d  filename                       *   value options(*string)               const char *
187      d  encoding                       *   value options(*string)               const char *
188
189      d UTF8ToHtml      pr                  extproc('UTF8ToHtml')
190      d                                     like(xmlCint)
191      d  out                       65535    options(*varsize)                    unsigned char []
192      d  outlen                             like(xmlCint)
193      d  in                             *   value options(*string)               const unsigned char*
194      d  inlen                              like(xmlCint)
195
196      d htmlEncodeEntities...
197      d                 pr                  extproc('htmlEncodeEntities')
198      d                                     like(xmlCint)
199      d  out                       65535    options(*varsize)                    unsigned char []
200      d  outlen                             like(xmlCint)
201      d  in                             *   value options(*string)               const unsigned char*
202      d  inlen                              like(xmlCint)
203      d  quoteChar                          value like(xmlCint)
204
205      d htmlIsScriptAttribute...
206      d                 pr                  extproc('htmlIsScriptAttribute')
207      d                                     like(xmlCint)
208      d  name                           *   value options(*string)               const xmlChar *
209
210      d htmlHandleOmittedElem...
211      d                 pr                  extproc('htmlHandleOmittedElem')
212      d                                     like(xmlCint)
213      d  val                                value like(xmlCint)
214
215       /if defined(LIBXML_PUSH_ENABLED)
216
217       * Interfaces for the Push mode.
218
219      d htmlCreatePushParserCtxt...
220      d                 pr                  extproc('htmlCreatePushParserCtxt')
221      d                                     like(htmlParserCtxtPtr)
222      d  sax                                value like(htmlSAXHandlerPtr)
223      d  user_data                      *   value                                void *
224      d  chunk                          *   value options(*string)               const char *
225      d  size                               value like(xmlCint)
226      d  filename                       *   value options(*string)               const char *
227      d  enc                                value like(xmlCharEncoding)
228
229      d htmlParseChunk  pr                  extproc('htmlParseChunk')
230      d                                     like(xmlCint)
231      d  ctxt                               value like(htmlParserCtxtPtr)
232      d  chunk                          *   value options(*string)               const char *
233      d  size                               value like(xmlCint)
234      d  terminate                          value like(xmlCint)
235       /endif                                                                    LIBXML_PUSH_ENABLED
236
237      d htmlFreeParserCtxt...
238      d                 pr                  extproc('htmlFreeParserCtxt')
239      d  ctxt                               value like(htmlParserCtxtPtr)
240
241       * New set of simpler/more flexible APIs
242
243       * xmlParserOption:
244       *
245       * This is the set of XML parser options that can be passed down
246       * to the xmlReadDoc() and similar calls.
247
248      d htmlParserOption...
249      d                 s                   based(######typedef######)
250      d                                     like(xmlCenum)
251      d  HTML_PARSE_RECOVER...                                                   Relaxed parsing
252      d                 c                   X'00000001'
253      d  HTML_PARSE_NODEFDTD...                                                  No default doctype
254      d                 c                   X'00000004'
255      d  HTML_PARSE_NOERROR...                                                   No error reports
256      d                 c                   X'00000020'
257      d  HTML_PARSE_NOWARNING...                                                 No warning reports
258      d                 c                   X'00000040'
259      d  HTML_PARSE_PEDANTIC...                                                  Pedantic err reports
260      d                 c                   X'00000080'
261      d  HTML_PARSE_NOBLANKS...                                                  Remove blank nodes
262      d                 c                   X'00000100'
263      d  HTML_PARSE_NONET...                                                     Forbid net access
264      d                 c                   X'00000800'
265      d  HTML_PARSE_NOIMPLIED...                                                 No implied html/body
266      d                 c                   X'00002000'
267      d  HTML_PARSE_COMPACT...                                                   compact small txtnod
268      d                 c                   X'00010000'
269      d  HTML_PARSE_IGNORE_ENC...                                                Ignore encoding hint
270      d                 c                   X'00200000'
271
272      d htmlCtxtReset   pr                  extproc('htmlCtxtReset')
273      d ctxt                                value like(htmlParserCtxtPtr)
274
275      d htmlCtxtUseOptions...
276      d                 pr                  extproc('htmlCtxtUseOptions')
277      d                                     like(xmlCint)
278      d ctxt                                value like(htmlParserCtxtPtr)
279      d options                             value like(xmlCint)
280
281      d htmlReadDoc     pr                  extproc('htmlReadDoc')
282      d                                     like(htmlDocPtr)
283      d  cur                            *   value options(*string)               const xmlChar *
284      d  URL                            *   value options(*string)               const char *
285      d  encoding                       *   value options(*string)               const char *
286      d  options                            value like(xmlCint)
287
288      d htmlReadFile    pr                  extproc('htmlReadFile')
289      d                                     like(htmlDocPtr)
290      d  URL                            *   value options(*string)               const char *
291      d  encoding                       *   value options(*string)               const char *
292      d  options                            value like(xmlCint)
293
294      d htmlReadMemory  pr                  extproc('htmlReadMemory')
295      d                                     like(htmlDocPtr)
296      d  buffer                         *   value options(*string)               const char *
297      d  size                               value like(xmlCint)
298      d  URL                            *   value options(*string)               const char *
299      d  encoding                       *   value options(*string)               const char *
300      d  options                            value like(xmlCint)
301
302      d htmlReadFd      pr                  extproc('htmlReadFd')
303      d                                     like(htmlDocPtr)
304      d  fd                                 value like(xmlCint)
305      d  URL                            *   value options(*string)               const char *
306      d  encoding                       *   value options(*string)               const char *
307      d  options                            value like(xmlCint)
308
309      d htmlReadIO      pr                  extproc('htmlReadIO')
310      d                                     like(htmlDocPtr)
311      d  ioread                             value like(xmlInputReadCallback)
312      d  ioclose                            value like(xmlInputCloseCallback)
313      d  ioctx                          *   value                                void *
314      d  URL                            *   value options(*string)               const char *
315      d  encoding                       *   value options(*string)               const char *
316      d  options                            value like(xmlCint)
317
318      d htmlCtxtReadDoc...
319      d                 pr                  extproc('htmlCtxtReadDoc')
320      d                                     like(htmlDocPtr)
321      d  ctxt                               value like(xmlParserCtxtPtr)
322      d  cur                            *   value options(*string)               const xmlChar *
323      d  URL                            *   value options(*string)               const char *
324      d  encoding                       *   value options(*string)               const char *
325      d  options                            value like(xmlCint)
326
327      d htmlCtxtReadFile...
328      d                 pr                  extproc('htmlCtxtReadFile')
329      d                                     like(htmlDocPtr)
330      d  ctxt                               value like(xmlParserCtxtPtr)
331      d  filename                       *   value options(*string)               const char *
332      d  encoding                       *   value options(*string)               const char *
333      d  options                            value like(xmlCint)
334
335      d htmlCtxtReadMemory...
336      d                 pr                  extproc('htmlCtxtReadMemory')
337      d                                     like(htmlDocPtr)
338      d  ctxt                               value like(xmlParserCtxtPtr)
339      d  buffer                         *   value options(*string)               const char *
340      d  size                               value like(xmlCint)
341      d  URL                            *   value options(*string)               const char *
342      d  encoding                       *   value options(*string)               const char *
343      d  options                            value like(xmlCint)
344
345      d htmlCtxtReadFd  pr                  extproc('htmlCtxtReadFd')
346      d                                     like(htmlDocPtr)
347      d  ctxt                               value like(xmlParserCtxtPtr)
348      d  fd                                 value like(xmlCint)
349      d  URL                            *   value options(*string)               const char *
350      d  encoding                       *   value options(*string)               const char *
351      d  options                            value like(xmlCint)
352
353      d htmlCtxtReadIO  pr                  extproc('htmlCtxtReadIO')
354      d                                     like(htmlDocPtr)
355      d  ctxt                               value like(xmlParserCtxtPtr)
356      d  ioread                             value like(xmlInputReadCallback)
357      d  ioclose                            value like(xmlInputCloseCallback)
358      d  ioctx                          *   value                                void *
359      d  URL                            *   value options(*string)               const char *
360      d  encoding                       *   value options(*string)               const char *
361      d  options                            value like(xmlCint)
362
363       * Further knowledge of HTML structure
364
365      d htmlStatus      s                   based(######typedef######)
366      d                                     like(xmlCenum)
367      d  HTML_NA        c                   X'0000'                              No check at all
368      d  HTML_INVALID   c                   X'0001'
369      d  HTML_DEPRECATED...
370      d                 c                   X'0002'
371      d  HTML_VALID     c                   X'0004'
372      d  HTML_REQUIRED  c                   X'000C'                              HTML_VALID ored-in
373
374       * Using htmlElemDesc rather than name here, to emphasise the fact
375       *  that otherwise there's a lookup overhead
376
377      d htmlAttrAllowed...
378      d                 pr                  extproc('htmlAttrAllowed')
379      d                                     like(htmlStatus)
380      d  #param1                            value like(htmlElemDescPtr)          const
381      d  #param2                        *   value options(*string)               const xmlChar *
382      d  #param3                            value like(xmlCint)
383
384      d htmlElementAllowedHere...
385      d                 pr                  extproc('htmlElementAllowedHere')
386      d                                     like(xmlCint)
387      d  #param1                            value like(htmlElemDescPtr)          const
388      d  #param2                        *   value options(*string)               const xmlChar *
389
390      d htmlElementStatusHere...
391      d                 pr                  extproc('htmlElementStatusHere')
392      d                                     like(htmlStatus)
393      d  #param1                            value like(htmlElemDescPtr)          const
394      d  #param2                            value like(htmlElemDescPtr)          const
395
396      d htmlNodeStatus  pr                  extproc('htmlNodeStatus')
397      d                                     like(htmlStatus)
398      d  #param1                            value like(htmlNodePtr)
399      d  #param2                            value like(xmlCint)
400
401       * C macros implemented as procedures for ILE/RPG support.
402
403      d htmlDefaultSubelement...
404      d                 pr              *   extproc('__htmlDefaultSubelement')   const char *
405      d  elt                            *   value                                const htmlElemDesc *
406
407      d htmlElementAllowedHereDesc...
408      d                 pr                  extproc(
409      d                                     '__htmlElementAllowedHereDesc')
410      d                                     like(xmlCint)
411      d  parent                         *   value                                const htmlElemDesc *
412      d  elt                            *   value                                const htmlElemDesc *
413
414      d htmlRequiredAttrs...
415      d                 pr              *   extproc('__htmlRequiredAttrs')        const char * *
416      d  elt                            *   value                                const htmlElemDesc *
417
418       /endif                                                                    LIBXML_HTML_ENABLED
419       /endif                                                                    HTML_PARSER_H__