7b4a6269cadbaabd7100f91c169fed7e86e8d507
[platform/upstream/libxml2.git] / os400 / libxmlrpg / HTMLparser.rpgle
1       * Summary: interface for an HTML 4.0 non-verifying parser
2       * Description: this module implements an HTML 4.0 non-verifying parser
3       *              with API compatible with the XML parser ones. It should
4       *              be able to parse "real world" HTML, even if severely
5       *              broken from a specification point of view.
6       *
7       * Copy: See Copyright for the status of this software.
8       *
9       * Author: Patrick Monnerat <pm@datasphere.ch>, DATASPHERE S.A.
10
11       /if not defined(HTML_PARSER_H__)
12       /define HTML_PARSER_H__
13
14       /include "libxmlrpg/xmlversion"
15       /include "libxmlrpg/parser"
16
17       /if defined(LIBXML_HTML_ENABLED)
18
19       * Most of the back-end structures from XML and HTML are shared.
20
21      d htmlParserCtxtPtr...
22      d                 s                   based(######typedef######)
23      d                                     like(xmlParserCtxtPtr)
24
25      d htmlParserCtxt  ds                  based(htmlParserCtxtPtr)
26      d                                     likeds(xmlParserCtxt)
27
28      d htmlParserNodeInfoPtr...
29      d                 s                   based(######typedef######)
30      d                                     like(xmlParserNodeInfoPtr)
31
32      d htmlParserNodeInfo...
33      d                 ds                  based(htmlParserNodeInfoPtr)
34      d                                     likeds(xmlParserNodeInfo)
35
36      d htmlSAXHandlerPtr...
37      d                 s                   based(######typedef######)
38      d                                     like(xmlSAXHandlerPtr)
39
40      d htmlSAXHandler  ds                  based(htmlSAXHandlerPtr)
41      d                                     likeds(xmlSAXHandler)
42
43      d htmlParserInputPtr...
44      d                 s                   based(######typedef######)
45      d                                     like(xmlParserInputPtr)
46
47      d htmlParserInput...
48      d                 ds                  based(htmlParserInputPtr)
49      d                                     likeds(xmlParserInput)
50
51      d htmlDocPtr      s                   based(######typedef######)
52      d                                     like(xmlDocPtr)
53
54      d htmlNodePtr     s                   based(######typedef######)
55      d                                     like(xmlNodePtr)
56
57       * Internal description of an HTML element, representing HTML 4.01
58       * and XHTML 1.0 (which share the same structure).
59
60      d htmlElemDescPtr...
61      d                 s               *   based(######typedef######)
62
63      d htmlElemDesc    ds                  based(htmlElemDescPtr)
64      d                                     align qualified
65      d  name                           *                                        const char *
66      d  startTag                      3u 0                                      Start tag implied ?
67      d  endTag                        3u 0                                      End tag implied ?
68      d  saveEndTag                    3u 0                                      Save end tag ?
69      d  empty                         3u 0                                      Empty element ?
70      d  depr                          3u 0                                      Deprecated element ?
71      d  dtd                           3u 0                                      Loose DTD/Frameset
72      d  isinline                      3u 0                                      Block 0/inline elem?
73      d  desc                           *                                        const char *
74       *
75       * New fields encapsulating HTML structure
76       *
77       * Bugs:
78       *      This is a very limited representation.  It fails to tell us when
79       *      an element *requires* subelements (we only have whether they're
80       *      allowed or not), and it doesn't tell us where CDATA and PCDATA
81       *      are allowed.  Some element relationships are not fully represented:
82       *      these are flagged with the word MODIFIER
83       *
84      d  subelts                        *                                        const char * *
85      d  defaultsubelt                  *                                        const char *
86      d  attrs_opt                      *                                        const char * *
87      d  attrs_depr                     *                                        const char * *
88      d  attrs_req                      *                                        const char * *
89
90       * Internal description of an HTML entity.
91
92      d htmlEntityDescPtr...
93      d                 s               *   based(######typedef######)
94
95      d htmlEntityDesc...
96      d                 ds                  based(htmlEntityDescPtr)
97      d                                     align qualified
98      d  value                        10u 0                                      Unicode char value
99      d  name                           *                                        const char *
100      d  desc                           *                                        const char *
101
102       * There is only few public functions.
103
104      d htmlTagLookup   pr                  extproc('htmlTagLookup')
105      d                                     like(htmlElemDescPtr)                const
106      d  tag                            *   value options(*string)               const xmlChar *
107
108      d htmlEntityLookup...
109      d                 pr                  extproc('htmlEntityLookup')
110      d                                     like(htmlEntityDescPtr)              const
111      d  name                           *   value options(*string)               const xmlChar *
112
113      d htmlEntityValueLookup...
114      d                 pr                  extproc('htmlEntityValueLookup')
115      d                                     like(htmlEntityDescPtr)              const
116      d  value                        10u 0 value
117
118      d htmlIsAutoClosed...
119      d                 pr            10i 0 extproc('htmlIsAutoClosed')
120      d  doc                                value like(htmlDocPtr)
121      d  elem                               value like(htmlNodePtr)
122
123      d htmlAutoCloseTag...
124      d                 pr            10i 0 extproc('htmlAutoCloseTag')
125      d  doc                                value like(htmlDocPtr)
126      d  name                           *   value options(*string)               const xmlChar *
127      d  elem                               value like(htmlNodePtr)
128
129      d htmlParseEntityRef...
130      d                 pr                  extproc('htmlParseEntityRef')
131      d                                     like(htmlEntityDescPtr)              const
132      d  ctxt                               value like(htmlParserCtxtPtr)
133      d  str                            *                                        const xmlChar *(*)
134
135      d htmlParseCharRef...
136      d                 pr            10i 0 extproc('htmlParseCharRef')
137      d  ctxt                               value like(htmlParserCtxtPtr)
138
139      d htmlParseElement...
140      d                 pr                  extproc('htmlParseElement')
141      d  ctxt                               value like(htmlParserCtxtPtr)
142
143      d htmlNewParserCtxt...
144      d                 pr                  extproc('htmlNewParserCtxt')
145      d                                     like(htmlParserCtxtPtr)
146
147      d htmlCreateMemoryParserCtxt...
148      d                 pr                  extproc('htmlCreateMemoryParserCtxt')
149      d                                     like(htmlParserCtxtPtr)
150      d  buffer                         *   value options(*string)               const char *
151      d  size                         10i 0 value
152
153      d htmlParseDocument...
154      d                 pr            10i 0 extproc('htmlParseDocument')
155      d  ctxt                               value like(htmlParserCtxtPtr)
156
157      d htmlSAXParseDoc...
158      d                 pr                  extproc('htmlSAXParseDoc')
159      d                                     like(htmlDocPtr)
160      d  cur                            *   value options(*string)               xmlChar *
161      d  encoding                       *   value options(*string)               const char *
162      d  sax                                value like(htmlSAXHandlerPtr)
163      d  userData                       *   value                                void *
164
165      d htmlParseDoc    pr                  extproc('htmlParseDoc')
166      d                                     like(htmlDocPtr)
167      d  cur                            *   value options(*string)               xmlChar *
168      d  encoding                       *   value options(*string)               const char *
169
170      d htmlSAXParseFile...
171      d                 pr                  extproc('htmlSAXParseFile')
172      d                                     like(htmlDocPtr)
173      d  filename                       *   value options(*string)               const char *
174      d  encoding                       *   value options(*string)               const char *
175      d  sax                                value like(htmlSAXHandlerPtr)
176      d  userData                       *   value                                void *
177
178      d htmlParseFile   pr                  extproc('htmlParseFile')
179      d                                     like(htmlDocPtr)
180      d  filename                       *   value options(*string)               const char *
181      d  encoding                       *   value options(*string)               const char *
182
183      d UTF8ToHtml      pr            10i 0 extproc('UTF8ToHtml')
184      d  out                       65535    options(*varsize)                    unsigned char []
185      d  outlen                       10i 0
186      d  in                             *   value options(*string)               const unsigned char*
187      d  inlen                        10i 0
188
189      d htmlEncodeEntities...
190      d                 pr            10i 0 extproc('htmlEncodeEntities')
191      d  out                       65535    options(*varsize)                    unsigned char []
192      d  outlen                       10i 0
193      d  in                             *   value options(*string)               const unsigned char*
194      d  inlen                        10i 0
195      d  quoteChar                    10i 0 value
196
197      d htmlIsScriptAttribute...
198      d                 pr            10i 0 extproc('htmlIsScriptAttribute')
199      d  name                           *   value options(*string)               const xmlChar *
200
201      d htmlHandleOmittedElem...
202      d                 pr            10i 0 extproc('htmlHandleOmittedElem')
203      d  val                          10i 0 value
204
205       /if defined(LIBXML_PUSH_ENABLED)
206
207       * Interfaces for the Push mode.
208
209      d htmlCreatePushParserCtxt...
210      d                 pr                  extproc('htmlCreatePushParserCtxt')
211      d                                     like(htmlParserCtxtPtr)
212      d  sax                                value like(htmlSAXHandlerPtr)
213      d  user_data                      *   value                                void *
214      d  chunk                          *   value options(*string)               const char *
215      d  size                         10i 0 value
216      d  filename                       *   value options(*string)               const char *
217      d  enc                                value like(xmlCharEncoding)
218
219      d htmlParseChunk  pr            10i 0 extproc('htmlParseChunk')
220      d  ctxt                               value like(htmlParserCtxtPtr)
221      d  chunk                          *   value options(*string)               const char *
222      d  size                         10i 0 value
223      d  terminate                    10i 0 value
224       /endif                                                                    LIBXML_PUSH_ENABLED
225
226      d htmlFreeParserCtxt...
227      d                 pr                  extproc('htmlFreeParserCtxt')
228      d  ctxt                               value like(htmlParserCtxtPtr)
229
230       * New set of simpler/more flexible APIs
231
232       * xmlParserOption:
233       *
234       * This is the set of XML parser options that can be passed down
235       * to the xmlReadDoc() and similar calls.
236
237      d htmlParserOption...
238      d                 s             10i 0 based(######typedef######)           enum
239      d  HTML_PARSE_RECOVER...                                                   Relaxed parsing
240      d                 c                   X'00000001'
241      d  HTML_PARSE_NODEFDTD...                                                  No default doctype
242      d                 c                   X'00000004'
243      d  HTML_PARSE_NOERROR...                                                   No error reports
244      d                 c                   X'00000020'
245      d  HTML_PARSE_NOWARNING...                                                 No warning reports
246      d                 c                   X'00000040'
247      d  HTML_PARSE_PEDANTIC...                                                  Pedantic err reports
248      d                 c                   X'00000080'
249      d  HTML_PARSE_NOBLANKS...                                                  Remove blank nodes
250      d                 c                   X'00000100'
251      d  HTML_PARSE_NONET...                                                     Forbid net access
252      d                 c                   X'00000800'
253      d  HTML_PARSE_NOIMPLIED...                                                 No implied html/body
254      d                 c                   X'00002000'
255      d  HTML_PARSE_COMPACT...                                                   compact small txtnod
256      d                 c                   X'00010000'
257      d  HTML_PARSE_IGNORE_ENC...                                                Ignore encoding hint
258      d                 c                   X'00200000'
259
260      d htmlCtxtReset   pr                  extproc('htmlCtxtReset')
261      d ctxt                                value like(htmlParserCtxtPtr)
262
263      d htmlCtxtUseOptions...
264      d                 pr            10i 0 extproc('htmlCtxtUseOptions')
265      d ctxt                                value like(htmlParserCtxtPtr)
266      d options                       10i 0 value
267
268      d htmlReadDoc     pr                  extproc('htmlReadDoc')
269      d                                     like(htmlDocPtr)
270      d  cur                            *   value options(*string)               const xmlChar *
271      d  URL                            *   value options(*string)               const char *
272      d  encoding                       *   value options(*string)               const char *
273      d  options                      10i 0 value
274
275      d htmlReadFile    pr                  extproc('htmlReadFile')
276      d                                     like(htmlDocPtr)
277      d  URL                            *   value options(*string)               const char *
278      d  encoding                       *   value options(*string)               const char *
279      d  options                      10i 0 value
280
281      d htmlReadMemory  pr                  extproc('htmlReadMemory')
282      d                                     like(htmlDocPtr)
283      d  buffer                         *   value options(*string)               const char *
284      d  size                         10i 0 value
285      d  URL                            *   value options(*string)               const char *
286      d  encoding                       *   value options(*string)               const char *
287      d  options                      10i 0 value
288
289      d htmlReadFd      pr                  extproc('htmlReadFd')
290      d                                     like(htmlDocPtr)
291      d  fd                           10i 0 value
292      d  URL                            *   value options(*string)               const char *
293      d  encoding                       *   value options(*string)               const char *
294      d  options                      10i 0 value
295
296      d htmlReadIO      pr                  extproc('htmlReadIO')
297      d                                     like(htmlDocPtr)
298      d  ioread                             value like(xmlInputReadCallback)
299      d  ioclose                            value like(xmlInputCloseCallback)
300      d  ioctx                          *   value                                void *
301      d  URL                            *   value options(*string)               const char *
302      d  encoding                       *   value options(*string)               const char *
303      d  options                      10i 0 value
304
305      d htmlCtxtReadDoc...
306      d                 pr                  extproc('htmlCtxtReadDoc')
307      d                                     like(htmlDocPtr)
308      d  ctxt                               value like(xmlParserCtxtPtr)
309      d  cur                            *   value options(*string)               const xmlChar *
310      d  URL                            *   value options(*string)               const char *
311      d  encoding                       *   value options(*string)               const char *
312      d  options                      10i 0 value
313
314      d htmlCtxtReadFile...
315      d                 pr                  extproc('htmlCtxtReadFile')
316      d                                     like(htmlDocPtr)
317      d  ctxt                               value like(xmlParserCtxtPtr)
318      d  filename                       *   value options(*string)               const char *
319      d  encoding                       *   value options(*string)               const char *
320      d  options                      10i 0 value
321
322      d htmlCtxtReadMemory...
323      d                 pr                  extproc('htmlCtxtReadMemory')
324      d                                     like(htmlDocPtr)
325      d  ctxt                               value like(xmlParserCtxtPtr)
326      d  buffer                         *   value options(*string)               const char *
327      d  size                         10i 0 value
328      d  URL                            *   value options(*string)               const char *
329      d  encoding                       *   value options(*string)               const char *
330      d  options                      10i 0 value
331
332      d htmlCtxtReadFd  pr                  extproc('htmlCtxtReadFd')
333      d                                     like(htmlDocPtr)
334      d  ctxt                               value like(xmlParserCtxtPtr)
335      d  fd                           10i 0 value
336      d  URL                            *   value options(*string)               const char *
337      d  encoding                       *   value options(*string)               const char *
338      d  options                      10i 0 value
339
340      d htmlCtxtReadIO  pr                  extproc('htmlCtxtReadIO')
341      d                                     like(htmlDocPtr)
342      d  ctxt                               value like(xmlParserCtxtPtr)
343      d  ioread                             value like(xmlInputReadCallback)
344      d  ioclose                            value like(xmlInputCloseCallback)
345      d  ioctx                          *   value                                void *
346      d  URL                            *   value options(*string)               const char *
347      d  encoding                       *   value options(*string)               const char *
348      d  options                      10i 0 value
349
350       * Further knowledge of HTML structure
351
352      d htmlStatus      s             10i 0 based(######typedef######)           enum
353      d  HTML_NA        c                   X'0000'                              No check at all
354      d  HTML_INVALID   c                   X'0001'
355      d  HTML_DEPRECATED...
356      d                 c                   X'0002'
357      d  HTML_VALID     c                   X'0004'
358      d  HTML_REQUIRED  c                   X'000C'                              HTML_VALID ored-in
359
360       * Using htmlElemDesc rather than name here, to emphasise the fact
361       *  that otherwise there's a lookup overhead
362
363      d htmlAttrAllowed...
364      d                 pr                  extproc('htmlAttrAllowed')
365      d                                     like(htmlStatus)
366      d  #param1                            value like(htmlElemDescPtr)          const
367      d  #param2                        *   value options(*string)               const xmlChar *
368      d  #param3                      10i 0 value
369
370      d htmlElementAllowedHere...
371      d                 pr            10i 0 extproc('htmlElementAllowedHere')
372      d  #param1                            value like(htmlElemDescPtr)          const
373      d  #param2                        *   value options(*string)               const xmlChar *
374
375      d htmlElementStatusHere...
376      d                 pr                  extproc('htmlElementStatusHere')
377      d                                     like(htmlStatus)
378      d  #param1                            value like(htmlElemDescPtr)          const
379      d  #param2                            value like(htmlElemDescPtr)          const
380
381      d htmlNodeStatus  pr                  extproc('htmlNodeStatus')
382      d                                     like(htmlStatus)
383      d  #param1                            value like(htmlNodePtr)
384      d  #param2                      10i 0 value
385
386       * C macros implemented as procedures for ILE/RPG support.
387
388      d htmlDefaultSubelement...
389      d                 pr              *   extproc('__htmlDefaultSubelement')   const char *
390      d  elt                            *   value                                const htmlElemDesc *
391
392      d htmlElementAllowedHereDesc...
393      d                 pr            10i 0 extproc(
394      d                                     '__htmlElementAllowedHereDesc')
395      d  parent                         *   value                                const htmlElemDesc *
396      d  elt                            *   value                                const htmlElemDesc *
397
398      d htmlRequiredAttrs...
399      d                 pr              *   extproc('__htmlRequiredAttrs')        const char * *
400      d  elt                            *   value                                const htmlElemDesc *
401
402       /endif                                                                    LIBXML_HTML_ENABLED
403       /endif                                                                    HTML_PARSER_H__