1 * Summary: interface for the encoding conversion functions
2 * Description: interface for the encoding conversion functions needed for
3 * XML basic encoding and iconv() support.
6 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
16 * Copy: See Copyright for the status of this software.
18 * Author: Patrick Monnerat <pm@datasphere.ch>, DATASPHERE S.A.
20 /if not defined(XML_CHAR_ENCODING_H__)
21 /define XML_CHAR_ENCODING_H__
23 /include "libxmlrpg/xmlversion"
27 * Predefined values for some standard encodings.
28 * Libxml does not do beforehand translation on UTF8 and ISOLatinX.
29 * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default.
31 * Anything else would have to be translated to UTF8 before being
32 * given to the parser itself. The BOM for UTF16 and the encoding
33 * declaration are looked at and a converter is looked for at that
34 * point. If not found the parser stops here as asked by the XML REC. A
35 * converter can be registered by the user
36 * xmlRegisterCharEncodingHandler but the current form doesn't allow
37 * stateful transcoding (a serious problem agreed !). If iconv has been
38 * found it will be used automatically and allow stateful transcoding,
39 * the simplest is then to be sure to enable iconv and to provide iconv
40 * libs for the encoding support needed.
42 * Note that the generic "UTF-16" is not a predefined value. Instead, only
43 * the specific UTF-16LE and UTF-16BE are present.
46 d s 10i 0 based(######typedef######) enum
47 d XML_CHAR_ENCODING_ERROR... No encoding detected
49 d XML_CHAR_ENCODING_NONE... No encoding detected
51 d XML_CHAR_ENCODING_UTF8... UTF-8
53 d XML_CHAR_ENCODING_UTF16LE... UTF-16 little endian
55 d XML_CHAR_ENCODING_UTF16BE... UTF-16 big endian
57 d XML_CHAR_ENCODING_UCS4LE... UCS-4 little endian
59 d XML_CHAR_ENCODING_UCS4BE... UCS-4 big endian
61 d XML_CHAR_ENCODING_EBCDIC... EBCDIC uh!
63 d XML_CHAR_ENCODING_UCS4_2143... UCS-4 unusual order
65 d XML_CHAR_ENCODING_UCS4_3412... UCS-4 unusual order
67 d XML_CHAR_ENCODING_UCS2... UCS-2
69 d XML_CHAR_ENCODING_8859_1... ISO-8859-1 ISOLatin1
71 d XML_CHAR_ENCODING_8859_2... ISO-8859-2 ISOLatin2
73 d XML_CHAR_ENCODING_8859_3... ISO-8859-3
75 d XML_CHAR_ENCODING_8859_4... ISO-8859-4
77 d XML_CHAR_ENCODING_8859_5... ISO-8859-5
79 d XML_CHAR_ENCODING_8859_6... ISO-8859-6
81 d XML_CHAR_ENCODING_8859_7... ISO-8859-7
83 d XML_CHAR_ENCODING_8859_8... ISO-8859-8
85 d XML_CHAR_ENCODING_8859_9... ISO-8859-9
87 d XML_CHAR_ENCODING_2022_JP... ISO-2022-JP
89 d XML_CHAR_ENCODING_SHIFT_JIS... Shift_JIS
91 d XML_CHAR_ENCODING_EUC_JP... EUC-JP
93 d XML_CHAR_ENCODING_ASCII... Pure ASCII
96 * xmlCharEncodingInputFunc:
97 * @out: a pointer to an array of bytes to store the UTF-8 result
98 * @outlen: the length of @out
99 * @in: a pointer to an array of chars in the original encoding
100 * @inlen: the length of @in
102 * Take a block of chars in the original encoding and try to convert
103 * it to an UTF-8 block of chars out.
105 * Returns the number of bytes written, -1 if lack of space, or -2
106 * if the transcoding failed.
107 * The value of @inlen after return is the number of octets consumed
108 * if the return value is positive, else unpredictiable.
109 * The value of @outlen after return is the number of octets consumed.
111 d xmlCharEncodingInputFunc...
112 d s * based(######typedef######)
115 * xmlCharEncodingOutputFunc:
116 * @out: a pointer to an array of bytes to store the result
117 * @outlen: the length of @out
118 * @in: a pointer to an array of UTF-8 chars
119 * @inlen: the length of @in
121 * Take a block of UTF-8 chars in and try to convert it to another
123 * Note: a first call designed to produce heading info is called with
124 * in = NULL. If stateful this should also initialize the encoder state.
126 * Returns the number of bytes written, -1 if lack of space, or -2
127 * if the transcoding failed.
128 * The value of @inlen after return is the number of octets consumed
129 * if the return value is positive, else unpredictiable.
130 * The value of @outlen after return is the number of octets produced.
132 d xmlCharEncodingOutputFunc...
133 d s * based(######typedef######)
136 * Block defining the handlers for non UTF-8 encodings.
137 * If iconv is supported, there are two extra fields.
139 /if defined(LIBXML_ICU_ENABLED)
140 d uconv_t ds based(######typedef######)
142 d uconv * UConverter *
143 d utf8 * UConverter *
146 d xmlCharEncodingHandlerPtr...
147 d s * based(######typedef######)
149 d xmlCharEncodingHandler...
150 d ds based(xmlCharEncodingHandlerPtr)
153 d input like(xmlCharEncodingInputFunc)
154 d output like(xmlCharEncodingOutputFunc)
156 /if defined(LIBXML_ICONV_ENABLED)
158 d iconv_out * iconv_t
159 /endif LIBXML_ICONV_ENABLED
161 /if defined(LIBXML_ICU_ENABLED)
162 d uconv_in * uconv_t *
163 d uconv_out * uconv_t *
164 /endif LIBXML_ICU_ENABLED
166 /include "libxmlrpg/tree"
168 * Interfaces for encoding handlers.
170 d xmlInitCharEncodingHandlers...
172 d 'xmlInitCharEncodingHandlers')
174 d xmlCleanupCharEncodingHandlers...
176 d 'xmlCleanupCharEncodingHandlers')
178 d xmlRegisterCharEncodingHandler...
180 d 'xmlRegisterCharEncodingHandler')
181 d handler value like(xmlCharEncodingHandlerPtr)
183 d xmlGetCharEncodingHandler...
184 d pr extproc('xmlGetCharEncodingHandler')
185 d like(xmlCharEncodingHandlerPtr)
186 d enc value like(xmlCharEncoding)
188 d xmlFindCharEncodingHandler...
189 d pr extproc('xmlFindCharEncodingHandler')
190 d like(xmlCharEncodingHandlerPtr)
191 d name * value options(*string) const char *
193 d xmlNewCharEncodingHandler...
194 d pr extproc('xmlNewCharEncodingHandler')
195 d like(xmlCharEncodingHandlerPtr)
196 d name * value options(*string) const char *
197 d input value like(xmlCharEncodingInputFunc)
198 d output value like(xmlCharEncodingOutputFunc)
200 * Interfaces for encoding names and aliases.
202 d xmlAddEncodingAlias...
203 d pr 10i 0 extproc('xmlAddEncodingAlias')
204 d name * value options(*string) const char *
205 d alias * value options(*string) const char *
207 d xmlDelEncodingAlias...
208 d pr 10i 0 extproc('xmlDelEncodingAlias')
209 d alias * value options(*string) const char *
211 d xmlGetEncodingAlias...
212 d pr * extproc('xmlGetEncodingAlias') const char *
213 d alias * value options(*string) const char *
215 d xmlCleanupEncodingAliases...
216 d pr extproc('xmlCleanupEncodingAliases')
218 d xmlParseCharEncoding...
219 d pr extproc('xmlParseCharEncoding')
220 d like(xmlCharEncoding)
221 d name * value options(*string) const char *
223 d xmlGetCharEncodingName...
224 d pr * extproc('xmlGetCharEncodingName') const char *
225 d enc value like(xmlCharEncoding)
227 * Interfaces directly used by the parsers.
229 d xmlDetectCharEncoding...
230 d pr extproc('xmlDetectCharEncoding')
231 d like(xmlCharEncoding)
232 d in * value options(*string) const unsigned char*
235 d xmlCharEncOutFunc...
236 d pr 10i 0 extproc('xmlCharEncOutFunc')
237 d handler like(xmlCharEncodingHandler)
238 d out value like(xmlBufferPtr)
239 d in value like(xmlBufferPtr)
241 d xmlCharEncInFunc...
242 d pr 10i 0 extproc('xmlCharEncInFunc')
243 d handler like(xmlCharEncodingHandler)
244 d out value like(xmlBufferPtr)
245 d in value like(xmlBufferPtr)
247 d xmlCharEncFirstLine...
248 d pr 10i 0 extproc('xmlCharEncFirstLine')
249 d handler like(xmlCharEncodingHandler)
250 d out value like(xmlBufferPtr)
251 d in value like(xmlBufferPtr)
253 d xmlCharEncCloseFunc...
254 d pr 10i 0 extproc('xmlCharEncCloseFunc')
255 d handler like(xmlCharEncodingHandler)
257 * Export a few useful functions
259 /if defined(LIBXML_OUTPUT_ENABLED)
260 d UTF8Toisolat1 pr 10i 0 extproc('UTF8Toisolat1')
261 d out 65535 options(*varsize) unsigned char (*)
263 d in * value options(*string) const unsigned char*
266 /endif LIBXML_OUTPUT_ENABLD
268 d isolat1ToUTF8 pr 10i 0 extproc('isolat1ToUTF8')
269 d out 65535 options(*varsize) unsigned char (*)
271 d in * value options(*string) const unsigned char*
274 /endif XML_CHAR_ENCODING_H