1 * Summary: interface for the encoding conversion functions
2 * Description: interface for the encoding conversion functions needed for
3 * XML basic encoding and iconv() support.
6 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
16 * Copy: See Copyright for the status of this software.
18 * Author: Patrick Monnerat <pm@datasphere.ch>, DATASPHERE S.A.
20 /if not defined(XML_CHAR_ENCODING_H__)
21 /define XML_CHAR_ENCODING_H__
23 /include "libxmlrpg/xmlversion"
24 /include "libxmlrpg/xmlTypesC"
28 * Predefined values for some standard encodings.
29 * Libxml does not do beforehand translation on UTF8 and ISOLatinX.
30 * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default.
32 * Anything else would have to be translated to UTF8 before being
33 * given to the parser itself. The BOM for UTF16 and the encoding
34 * declaration are looked at and a converter is looked for at that
35 * point. If not found the parser stops here as asked by the XML REC. A
36 * converter can be registered by the user
37 * xmlRegisterCharEncodingHandler but the current form doesn't allow
38 * stateful transcoding (a serious problem agreed !). If iconv has been
39 * found it will be used automatically and allow stateful transcoding,
40 * the simplest is then to be sure to enable iconv and to provide iconv
41 * libs for the encoding support needed.
43 * Note that the generic "UTF-16" is not a predefined value. Instead, only
44 * the specific UTF-16LE and UTF-16BE are present.
47 d s based(######typedef######)
49 d XML_CHAR_ENCODING_ERROR... No encoding detected
51 d XML_CHAR_ENCODING_NONE... No encoding detected
53 d XML_CHAR_ENCODING_UTF8... UTF-8
55 d XML_CHAR_ENCODING_UTF16LE... UTF-16 little endian
57 d XML_CHAR_ENCODING_UTF16BE... UTF-16 big endian
59 d XML_CHAR_ENCODING_UCS4LE... UCS-4 little endian
61 d XML_CHAR_ENCODING_UCS4BE... UCS-4 big endian
63 d XML_CHAR_ENCODING_EBCDIC... EBCDIC uh!
65 d XML_CHAR_ENCODING_UCS4_2143... UCS-4 unusual order
67 d XML_CHAR_ENCODING_UCS4_3412... UCS-4 unusual order
69 d XML_CHAR_ENCODING_UCS2... UCS-2
71 d XML_CHAR_ENCODING_8859_1... ISO-8859-1 ISOLatin1
73 d XML_CHAR_ENCODING_8859_2... ISO-8859-2 ISOLatin2
75 d XML_CHAR_ENCODING_8859_3... ISO-8859-3
77 d XML_CHAR_ENCODING_8859_4... ISO-8859-4
79 d XML_CHAR_ENCODING_8859_5... ISO-8859-5
81 d XML_CHAR_ENCODING_8859_6... ISO-8859-6
83 d XML_CHAR_ENCODING_8859_7... ISO-8859-7
85 d XML_CHAR_ENCODING_8859_8... ISO-8859-8
87 d XML_CHAR_ENCODING_8859_9... ISO-8859-9
89 d XML_CHAR_ENCODING_2022_JP... ISO-2022-JP
91 d XML_CHAR_ENCODING_SHIFT_JIS... Shift_JIS
93 d XML_CHAR_ENCODING_EUC_JP... EUC-JP
95 d XML_CHAR_ENCODING_ASCII... Pure ASCII
98 * xmlCharEncodingInputFunc:
99 * @out: a pointer to an array of bytes to store the UTF-8 result
100 * @outlen: the length of @out
101 * @in: a pointer to an array of chars in the original encoding
102 * @inlen: the length of @in
104 * Take a block of chars in the original encoding and try to convert
105 * it to an UTF-8 block of chars out.
107 * Returns the number of bytes written, -1 if lack of space, or -2
108 * if the transcoding failed.
109 * The value of @inlen after return is the number of octets consumed
110 * if the return value is positive, else unpredictiable.
111 * The value of @outlen after return is the number of octets consumed.
113 d xmlCharEncodingInputFunc...
114 d s * based(######typedef######)
117 * xmlCharEncodingOutputFunc:
118 * @out: a pointer to an array of bytes to store the result
119 * @outlen: the length of @out
120 * @in: a pointer to an array of UTF-8 chars
121 * @inlen: the length of @in
123 * Take a block of UTF-8 chars in and try to convert it to another
125 * Note: a first call designed to produce heading info is called with
126 * in = NULL. If stateful this should also initialize the encoder state.
128 * Returns the number of bytes written, -1 if lack of space, or -2
129 * if the transcoding failed.
130 * The value of @inlen after return is the number of octets consumed
131 * if the return value is positive, else unpredictiable.
132 * The value of @outlen after return is the number of octets produced.
134 d xmlCharEncodingOutputFunc...
135 d s * based(######typedef######)
138 * Block defining the handlers for non UTF-8 encodings.
139 * If iconv is supported, there are two extra fields.
141 /if defined(LIBXML_ICU_ENABLED)
142 d uconv_t ds based(######typedef######)
144 d uconv * UConverter *
145 d utf8 * UConverter *
148 d xmlCharEncodingHandlerPtr...
149 d s * based(######typedef######)
151 d xmlCharEncodingHandler...
152 d ds based(xmlCharEncodingHandlerPtr)
155 d input like(xmlCharEncodingInputFunc)
156 d output like(xmlCharEncodingOutputFunc)
158 /if defined(LIBXML_ICONV_ENABLED)
160 d iconv_out * iconv_t
161 /endif LIBXML_ICONV_ENABLED
163 /if defined(LIBXML_ICU_ENABLED)
164 d uconv_in * uconv_t *
165 d uconv_out * uconv_t *
166 /endif LIBXML_ICU_ENABLED
168 /include "libxmlrpg/tree"
170 * Interfaces for encoding handlers.
172 d xmlInitCharEncodingHandlers...
174 d 'xmlInitCharEncodingHandlers')
176 d xmlCleanupCharEncodingHandlers...
178 d 'xmlCleanupCharEncodingHandlers')
180 d xmlRegisterCharEncodingHandler...
182 d 'xmlRegisterCharEncodingHandler')
183 d handler value like(xmlCharEncodingHandlerPtr)
185 d xmlGetCharEncodingHandler...
186 d pr extproc('xmlGetCharEncodingHandler')
187 d like(xmlCharEncodingHandlerPtr)
188 d enc value like(xmlCharEncoding)
190 d xmlFindCharEncodingHandler...
191 d pr extproc('xmlFindCharEncodingHandler')
192 d like(xmlCharEncodingHandlerPtr)
193 d name * value options(*string) const char *
195 d xmlNewCharEncodingHandler...
196 d pr extproc('xmlNewCharEncodingHandler')
197 d like(xmlCharEncodingHandlerPtr)
198 d name * value options(*string) const char *
199 d input value like(xmlCharEncodingInputFunc)
200 d output value like(xmlCharEncodingOutputFunc)
202 * Interfaces for encoding names and aliases.
204 d xmlAddEncodingAlias...
205 d pr extproc('xmlAddEncodingAlias')
207 d name * value options(*string) const char *
208 d alias * value options(*string) const char *
210 d xmlDelEncodingAlias...
211 d pr extproc('xmlDelEncodingAlias')
213 d alias * value options(*string) const char *
215 d xmlGetEncodingAlias...
216 d pr * extproc('xmlGetEncodingAlias') const char *
217 d alias * value options(*string) const char *
219 d xmlCleanupEncodingAliases...
220 d pr extproc('xmlCleanupEncodingAliases')
222 d xmlParseCharEncoding...
223 d pr extproc('xmlParseCharEncoding')
224 d like(xmlCharEncoding)
225 d name * value options(*string) const char *
227 d xmlGetCharEncodingName...
228 d pr * extproc('xmlGetCharEncodingName') const char *
229 d enc value like(xmlCharEncoding)
231 * Interfaces directly used by the parsers.
233 d xmlDetectCharEncoding...
234 d pr extproc('xmlDetectCharEncoding')
235 d like(xmlCharEncoding)
236 d in * value options(*string) const unsigned char*
237 d len value like(xmlCint)
239 d xmlCharEncOutFunc...
240 d pr extproc('xmlCharEncOutFunc')
242 d handler likeds(xmlCharEncodingHandler)
243 d out value like(xmlBufferPtr)
244 d in value like(xmlBufferPtr)
246 d xmlCharEncInFunc...
247 d pr extproc('xmlCharEncInFunc')
249 d handler likeds(xmlCharEncodingHandler)
250 d out value like(xmlBufferPtr)
251 d in value like(xmlBufferPtr)
253 d xmlCharEncFirstLine...
254 d pr extproc('xmlCharEncFirstLine')
256 d handler likeds(xmlCharEncodingHandler)
257 d out value like(xmlBufferPtr)
258 d in value like(xmlBufferPtr)
260 d xmlCharEncCloseFunc...
261 d pr extproc('xmlCharEncCloseFunc')
263 d handler likeds(xmlCharEncodingHandler)
265 * Export a few useful functions
267 /if defined(LIBXML_OUTPUT_ENABLED)
268 d UTF8Toisolat1 pr extproc('UTF8Toisolat1')
270 d out 65535 options(*varsize) unsigned char (*)
271 d outlen like(xmlCint)
272 d in * value options(*string) const unsigned char*
273 d inlen like(xmlCint)
275 /endif LIBXML_OUTPUT_ENABLD
277 d isolat1ToUTF8 pr extproc('isolat1ToUTF8')
279 d out 65535 options(*varsize) unsigned char (*)
280 d outlen like(xmlCint)
281 d in * value options(*string) const unsigned char*
282 d inlen like(xmlCint)
284 /endif XML_CHAR_ENCODING_H