Bump to 1.1
[platform/upstream/libunistring.git] / doc / libunistring_13.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html401/loose.dtd">
2 <html>
3 <!-- Created on October, 16 2022 by texi2html 1.78a -->
4 <!--
5 Written by: Lionel Cons <Lionel.Cons@cern.ch> (original author)
6             Karl Berry  <karl@freefriends.org>
7             Olaf Bachmann <obachman@mathematik.uni-kl.de>
8             and many others.
9 Maintained by: Many creative people.
10 Send bugs and suggestions to <texi2html-bug@nongnu.org>
11
12 -->
13 <head>
14 <title>GNU libunistring: 13. Normalization forms (composition and decomposition) &lt;uninorm.h&gt;</title>
15
16 <meta name="description" content="GNU libunistring: 13. Normalization forms (composition and decomposition) &lt;uninorm.h&gt;">
17 <meta name="keywords" content="GNU libunistring: 13. Normalization forms (composition and decomposition) &lt;uninorm.h&gt;">
18 <meta name="resource-type" content="document">
19 <meta name="distribution" content="global">
20 <meta name="Generator" content="texi2html 1.78a">
21 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
22 <style type="text/css">
23 <!--
24 a.summary-letter {text-decoration: none}
25 pre.display {font-family: serif}
26 pre.format {font-family: serif}
27 pre.menu-comment {font-family: serif}
28 pre.menu-preformatted {font-family: serif}
29 pre.smalldisplay {font-family: serif; font-size: smaller}
30 pre.smallexample {font-size: smaller}
31 pre.smallformat {font-family: serif; font-size: smaller}
32 pre.smalllisp {font-size: smaller}
33 span.roman {font-family:serif; font-weight:normal;}
34 span.sansserif {font-family:sans-serif; font-weight:normal;}
35 ul.toc {list-style: none}
36 -->
37 </style>
38
39
40 </head>
41
42 <body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000">
43
44 <table cellpadding="1" cellspacing="1" border="0">
45 <tr><td valign="middle" align="left">[<a href="libunistring_12.html#SEC60" title="Beginning of this chapter or previous chapter"> &lt;&lt; </a>]</td>
46 <td valign="middle" align="left">[<a href="libunistring_14.html#SEC67" title="Next chapter"> &gt;&gt; </a>]</td>
47 <td valign="middle" align="left"> &nbsp; </td>
48 <td valign="middle" align="left"> &nbsp; </td>
49 <td valign="middle" align="left"> &nbsp; </td>
50 <td valign="middle" align="left"> &nbsp; </td>
51 <td valign="middle" align="left"> &nbsp; </td>
52 <td valign="middle" align="left">[<a href="libunistring_toc.html#SEC_Top" title="Cover (top) of document">Top</a>]</td>
53 <td valign="middle" align="left">[<a href="libunistring_toc.html#SEC_Contents" title="Table of contents">Contents</a>]</td>
54 <td valign="middle" align="left">[<a href="libunistring_21.html#SEC92" title="Index">Index</a>]</td>
55 <td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td>
56 </tr></table>
57
58 <hr size="2">
59 <a name="uninorm_002eh"></a>
60 <a name="SEC61"></a>
61 <h1 class="chapter"> <a href="libunistring_toc.html#TOC61">13. Normalization forms (composition and decomposition) <code>&lt;uninorm.h&gt;</code></a> </h1>
62
63 <p>This include file defines functions for transforming Unicode strings to one
64 of the four normal forms, known as NFC, NFD, NKFC, NFKD.  These
65 transformations involve decomposition and &mdash; for NFC and NFKC &mdash; composition
66 of Unicode characters.
67 </p>
68
69 <hr size="6">
70 <a name="Decomposition-of-characters"></a>
71 <a name="SEC62"></a>
72 <h2 class="section"> <a href="libunistring_toc.html#TOC62">13.1 Decomposition of Unicode characters</a> </h2>
73
74 <p>The following enumerated values are the possible types of decomposition of a
75 Unicode character.
76 </p>
77 <dl>
78 <dt><u>Constant:</u> int <b>UC_DECOMP_CANONICAL</b>
79 <a name="IDX841"></a>
80 </dt>
81 <dd><p>Denotes canonical decomposition.
82 </p></dd></dl>
83
84 <dl>
85 <dt><u>Constant:</u> int <b>UC_DECOMP_FONT</b>
86 <a name="IDX842"></a>
87 </dt>
88 <dd><p>UCD marker: <code>&lt;font&gt;</code>.  Denotes a font variant (e.g. a blackletter form).
89 </p></dd></dl>
90
91 <dl>
92 <dt><u>Constant:</u> int <b>UC_DECOMP_NOBREAK</b>
93 <a name="IDX843"></a>
94 </dt>
95 <dd><p>UCD marker: <code>&lt;noBreak&gt;</code>.
96 Denotes a no-break version of a space or hyphen.
97 </p></dd></dl>
98
99 <dl>
100 <dt><u>Constant:</u> int <b>UC_DECOMP_INITIAL</b>
101 <a name="IDX844"></a>
102 </dt>
103 <dd><p>UCD marker: <code>&lt;initial&gt;</code>.
104 Denotes an initial presentation form (Arabic).
105 </p></dd></dl>
106
107 <dl>
108 <dt><u>Constant:</u> int <b>UC_DECOMP_MEDIAL</b>
109 <a name="IDX845"></a>
110 </dt>
111 <dd><p>UCD marker: <code>&lt;medial&gt;</code>.
112 Denotes a medial presentation form (Arabic).
113 </p></dd></dl>
114
115 <dl>
116 <dt><u>Constant:</u> int <b>UC_DECOMP_FINAL</b>
117 <a name="IDX846"></a>
118 </dt>
119 <dd><p>UCD marker: <code>&lt;final&gt;</code>.
120 Denotes a final presentation form (Arabic).
121 </p></dd></dl>
122
123 <dl>
124 <dt><u>Constant:</u> int <b>UC_DECOMP_ISOLATED</b>
125 <a name="IDX847"></a>
126 </dt>
127 <dd><p>UCD marker: <code>&lt;isolated&gt;</code>.
128 Denotes an isolated presentation form (Arabic).
129 </p></dd></dl>
130
131 <dl>
132 <dt><u>Constant:</u> int <b>UC_DECOMP_CIRCLE</b>
133 <a name="IDX848"></a>
134 </dt>
135 <dd><p>UCD marker: <code>&lt;circle&gt;</code>.
136 Denotes an encircled form.
137 </p></dd></dl>
138
139 <dl>
140 <dt><u>Constant:</u> int <b>UC_DECOMP_SUPER</b>
141 <a name="IDX849"></a>
142 </dt>
143 <dd><p>UCD marker: <code>&lt;super&gt;</code>.
144 Denotes a superscript form.
145 </p></dd></dl>
146
147 <dl>
148 <dt><u>Constant:</u> int <b>UC_DECOMP_SUB</b>
149 <a name="IDX850"></a>
150 </dt>
151 <dd><p>UCD marker: <code>&lt;sub&gt;</code>.
152 Denotes a subscript form.
153 </p></dd></dl>
154
155 <dl>
156 <dt><u>Constant:</u> int <b>UC_DECOMP_VERTICAL</b>
157 <a name="IDX851"></a>
158 </dt>
159 <dd><p>UCD marker: <code>&lt;vertical&gt;</code>.
160 Denotes a vertical layout presentation form.
161 </p></dd></dl>
162
163 <dl>
164 <dt><u>Constant:</u> int <b>UC_DECOMP_WIDE</b>
165 <a name="IDX852"></a>
166 </dt>
167 <dd><p>UCD marker: <code>&lt;wide&gt;</code>.
168 Denotes a wide (or zenkaku) compatibility character.
169 </p></dd></dl>
170
171 <dl>
172 <dt><u>Constant:</u> int <b>UC_DECOMP_NARROW</b>
173 <a name="IDX853"></a>
174 </dt>
175 <dd><p>UCD marker: <code>&lt;narrow&gt;</code>.
176 Denotes a narrow (or hankaku) compatibility character.
177 </p></dd></dl>
178
179 <dl>
180 <dt><u>Constant:</u> int <b>UC_DECOMP_SMALL</b>
181 <a name="IDX854"></a>
182 </dt>
183 <dd><p>UCD marker: <code>&lt;small&gt;</code>.
184 Denotes a small variant form (CNS compatibility).
185 </p></dd></dl>
186
187 <dl>
188 <dt><u>Constant:</u> int <b>UC_DECOMP_SQUARE</b>
189 <a name="IDX855"></a>
190 </dt>
191 <dd><p>UCD marker: <code>&lt;square&gt;</code>.
192 Denotes a CJK squared font variant.
193 </p></dd></dl>
194
195 <dl>
196 <dt><u>Constant:</u> int <b>UC_DECOMP_FRACTION</b>
197 <a name="IDX856"></a>
198 </dt>
199 <dd><p>UCD marker: <code>&lt;fraction&gt;</code>.
200 Denotes a vulgar fraction form.
201 </p></dd></dl>
202
203 <dl>
204 <dt><u>Constant:</u> int <b>UC_DECOMP_COMPAT</b>
205 <a name="IDX857"></a>
206 </dt>
207 <dd><p>UCD marker: <code>&lt;compat&gt;</code>.
208 Denotes an otherwise unspecified compatibility character.
209 </p></dd></dl>
210
211 <p>The following constant denotes the maximum size of decomposition of a single
212 Unicode character.
213 </p>
214 <dl>
215 <dt><u>Macro:</u> unsigned int <b>UC_DECOMPOSITION_MAX_LENGTH</b>
216 <a name="IDX858"></a>
217 </dt>
218 <dd><p>This macro expands to a constant that is the required size of buffer passed to
219 the <code>uc_decomposition</code> and <code>uc_canonical_decomposition</code> functions.
220 </p></dd></dl>
221
222 <p>The following functions decompose a Unicode character.
223 </p>
224 <dl>
225 <dt><u>Function:</u> int <b>uc_decomposition</b><i> (ucs4_t&nbsp;<var>uc</var>, int&nbsp;*<var>decomp_tag</var>, ucs4_t&nbsp;*<var>decomposition</var>)</i>
226 <a name="IDX859"></a>
227 </dt>
228 <dd><p>Returns the character decomposition mapping of the Unicode character <var>uc</var>.
229 <var>decomposition</var> must point to an array of at least
230 <code>UC_DECOMPOSITION_MAX_LENGTH</code> <code>ucs_t</code> elements.
231 </p>
232 <p>When a decomposition exists, <code><var>decomposition</var>[0..<var>n</var>-1]</code> and
233 <code>*<var>decomp_tag</var></code> are filled and <var>n</var> is returned.  Otherwise -1 is
234 returned.
235 </p></dd></dl>
236
237 <dl>
238 <dt><u>Function:</u> int <b>uc_canonical_decomposition</b><i> (ucs4_t&nbsp;<var>uc</var>, ucs4_t&nbsp;*<var>decomposition</var>)</i>
239 <a name="IDX860"></a>
240 </dt>
241 <dd><p>Returns the canonical character decomposition mapping of the Unicode character
242 <var>uc</var>.  <var>decomposition</var> must point to an array of at least
243 <code>UC_DECOMPOSITION_MAX_LENGTH</code> <code>ucs_t</code> elements.
244 </p>
245 <p>When a decomposition exists, <code><var>decomposition</var>[0..<var>n</var>-1]</code> is filled
246 and <var>n</var> is returned.  Otherwise -1 is returned.
247 </p>
248 <p>Note: This function returns the (simple) &ldquo;canonical decomposition&rdquo; of
249 <var>uc</var>.  If you want the &ldquo;full canonical decomposition&rdquo; of <var>uc</var>,
250 that is, the recursive application of &ldquo;canonical decomposition&rdquo;, use the
251 function <code>u*_normalize</code> with argument <code>UNINORM_NFD</code> instead.
252 </p></dd></dl>
253
254 <hr size="6">
255 <a name="Composition-of-characters"></a>
256 <a name="SEC63"></a>
257 <h2 class="section"> <a href="libunistring_toc.html#TOC63">13.2 Composition of Unicode characters</a> </h2>
258
259 <p>The following function composes a Unicode character from two Unicode
260 characters.
261 </p>
262 <dl>
263 <dt><u>Function:</u> ucs4_t <b>uc_composition</b><i> (ucs4_t&nbsp;<var>uc1</var>, ucs4_t&nbsp;<var>uc2</var>)</i>
264 <a name="IDX861"></a>
265 </dt>
266 <dd><p>Attempts to combine the Unicode characters <var>uc1</var>, <var>uc2</var>.
267 <var>uc1</var> is known to have canonical combining class 0.
268 </p>
269 <p>Returns the combination of <var>uc1</var> and <var>uc2</var>, if it exists.
270 Returns 0 otherwise.
271 </p>
272 <p>Not all decompositions can be recombined using this function.  See the Unicode
273 file &lsquo;<tt>CompositionExclusions.txt</tt>&rsquo; for details.
274 </p></dd></dl>
275
276 <hr size="6">
277 <a name="Normalization-of-strings"></a>
278 <a name="SEC64"></a>
279 <h2 class="section"> <a href="libunistring_toc.html#TOC64">13.3 Normalization of strings</a> </h2>
280
281 <p>The Unicode standard defines four normalization forms for Unicode strings.
282 The following type is used to denote a normalization form.
283 </p>
284 <dl>
285 <dt><u>Type:</u> <b>uninorm_t</b>
286 <a name="IDX862"></a>
287 </dt>
288 <dd><p>An object of type <code>uninorm_t</code> denotes a Unicode normalization form.
289 This is a scalar type; its values can be compared with <code>==</code>.
290 </p></dd></dl>
291
292 <p>The following constants denote the four normalization forms.
293 </p>
294 <dl>
295 <dt><u>Macro:</u> uninorm_t <b>UNINORM_NFD</b>
296 <a name="IDX863"></a>
297 </dt>
298 <dd><p>Denotes Normalization form D: canonical decomposition.
299 </p></dd></dl>
300
301 <dl>
302 <dt><u>Macro:</u> uninorm_t <b>UNINORM_NFC</b>
303 <a name="IDX864"></a>
304 </dt>
305 <dd><p>Normalization form C: canonical decomposition, then canonical composition.
306 </p></dd></dl>
307
308 <dl>
309 <dt><u>Macro:</u> uninorm_t <b>UNINORM_NFKD</b>
310 <a name="IDX865"></a>
311 </dt>
312 <dd><p>Normalization form KD: compatibility decomposition.
313 </p></dd></dl>
314
315 <dl>
316 <dt><u>Macro:</u> uninorm_t <b>UNINORM_NFKC</b>
317 <a name="IDX866"></a>
318 </dt>
319 <dd><p>Normalization form KC: compatibility decomposition, then canonical composition.
320 </p></dd></dl>
321
322 <p>The following functions operate on <code>uninorm_t</code> objects.
323 </p>
324 <dl>
325 <dt><u>Function:</u> bool <b>uninorm_is_compat_decomposing</b><i> (uninorm_t&nbsp;<var>nf</var>)</i>
326 <a name="IDX867"></a>
327 </dt>
328 <dd><p>Tests whether the normalization form <var>nf</var> does compatibility decomposition.
329 </p></dd></dl>
330
331 <dl>
332 <dt><u>Function:</u> bool <b>uninorm_is_composing</b><i> (uninorm_t&nbsp;<var>nf</var>)</i>
333 <a name="IDX868"></a>
334 </dt>
335 <dd><p>Tests whether the normalization form <var>nf</var> includes canonical composition.
336 </p></dd></dl>
337
338 <dl>
339 <dt><u>Function:</u> uninorm_t <b>uninorm_decomposing_form</b><i> (uninorm_t&nbsp;<var>nf</var>)</i>
340 <a name="IDX869"></a>
341 </dt>
342 <dd><p>Returns the decomposing variant of the normalization form <var>nf</var>.
343 This maps NFC,NFD → NFD and NFKC,NFKD → NFKD.
344 </p></dd></dl>
345
346 <p>The following functions apply a Unicode normalization form to a Unicode string.
347 </p>
348 <dl>
349 <dt><u>Function:</u> uint8_t * <b>u8_normalize</b><i> (uninorm_t&nbsp;<var>nf</var>, const&nbsp;uint8_t&nbsp;*<var>s</var>, size_t&nbsp;<var>n</var>, uint8_t&nbsp;*<var>resultbuf</var>, size_t&nbsp;*<var>lengthp</var>)</i>
350 <a name="IDX870"></a>
351 </dt>
352 <dt><u>Function:</u> uint16_t * <b>u16_normalize</b><i> (uninorm_t&nbsp;<var>nf</var>, const&nbsp;uint16_t&nbsp;*<var>s</var>, size_t&nbsp;<var>n</var>, uint16_t&nbsp;*<var>resultbuf</var>, size_t&nbsp;*<var>lengthp</var>)</i>
353 <a name="IDX871"></a>
354 </dt>
355 <dt><u>Function:</u> uint32_t * <b>u32_normalize</b><i> (uninorm_t&nbsp;<var>nf</var>, const&nbsp;uint32_t&nbsp;*<var>s</var>, size_t&nbsp;<var>n</var>, uint32_t&nbsp;*<var>resultbuf</var>, size_t&nbsp;*<var>lengthp</var>)</i>
356 <a name="IDX872"></a>
357 </dt>
358 <dd><p>Returns the specified normalization form of a string.
359 </p>
360 <p>The <var>resultbuf</var> and <var>lengthp</var> arguments are as described in
361 chapter <a href="libunistring_2.html#SEC8">Conventions</a>.
362 </p></dd></dl>
363
364 <hr size="6">
365 <a name="Normalizing-comparisons"></a>
366 <a name="SEC65"></a>
367 <h2 class="section"> <a href="libunistring_toc.html#TOC65">13.4 Normalizing comparisons</a> </h2>
368
369 <p>The following functions compare Unicode string, ignoring differences in
370 normalization.
371 </p>
372 <dl>
373 <dt><u>Function:</u> int <b>u8_normcmp</b><i> (const&nbsp;uint8_t&nbsp;*<var>s1</var>, size_t&nbsp;<var>n1</var>, const&nbsp;uint8_t&nbsp;*<var>s2</var>, size_t&nbsp;<var>n2</var>, uninorm_t&nbsp;<var>nf</var>, int&nbsp;*<var>resultp</var>)</i>
374 <a name="IDX873"></a>
375 </dt>
376 <dt><u>Function:</u> int <b>u16_normcmp</b><i> (const&nbsp;uint16_t&nbsp;*<var>s1</var>, size_t&nbsp;<var>n1</var>, const&nbsp;uint16_t&nbsp;*<var>s2</var>, size_t&nbsp;<var>n2</var>, uninorm_t&nbsp;<var>nf</var>, int&nbsp;*<var>resultp</var>)</i>
377 <a name="IDX874"></a>
378 </dt>
379 <dt><u>Function:</u> int <b>u32_normcmp</b><i> (const&nbsp;uint32_t&nbsp;*<var>s1</var>, size_t&nbsp;<var>n1</var>, const&nbsp;uint32_t&nbsp;*<var>s2</var>, size_t&nbsp;<var>n2</var>, uninorm_t&nbsp;<var>nf</var>, int&nbsp;*<var>resultp</var>)</i>
380 <a name="IDX875"></a>
381 </dt>
382 <dd><p>Compares <var>s1</var> and <var>s2</var>, ignoring differences in normalization.
383 </p>
384 <p><var>nf</var> must be either <code>UNINORM_NFD</code> or <code>UNINORM_NFKD</code>.
385 </p>
386 <p>If successful, sets <code>*<var>resultp</var></code> to -1 if <var>s1</var> &lt; <var>s2</var>,
387 0 if <var>s1</var> = <var>s2</var>, 1 if <var>s1</var> &gt; <var>s2</var>, and returns 0.
388 Upon failure, returns -1 with <code>errno</code> set.
389 </p></dd></dl>
390
391 <a name="IDX876"></a>
392 <a name="IDX877"></a>
393 <dl>
394 <dt><u>Function:</u> char * <b>u8_normxfrm</b><i> (const&nbsp;uint8_t&nbsp;*<var>s</var>, size_t&nbsp;<var>n</var>, uninorm_t&nbsp;<var>nf</var>, char&nbsp;*<var>resultbuf</var>, size_t&nbsp;*<var>lengthp</var>)</i>
395 <a name="IDX878"></a>
396 </dt>
397 <dt><u>Function:</u> char * <b>u16_normxfrm</b><i> (const&nbsp;uint16_t&nbsp;*<var>s</var>, size_t&nbsp;<var>n</var>, uninorm_t&nbsp;<var>nf</var>, char&nbsp;*<var>resultbuf</var>, size_t&nbsp;*<var>lengthp</var>)</i>
398 <a name="IDX879"></a>
399 </dt>
400 <dt><u>Function:</u> char * <b>u32_normxfrm</b><i> (const&nbsp;uint32_t&nbsp;*<var>s</var>, size_t&nbsp;<var>n</var>, uninorm_t&nbsp;<var>nf</var>, char&nbsp;*<var>resultbuf</var>, size_t&nbsp;*<var>lengthp</var>)</i>
401 <a name="IDX880"></a>
402 </dt>
403 <dd><p>Converts the string <var>s</var> of length <var>n</var> to a NUL-terminated byte
404 sequence, in such a way that comparing <code>u8_normxfrm (<var>s1</var>)</code> and
405 <code>u8_normxfrm (<var>s2</var>)</code> with the <code>u8_cmp2</code> function is equivalent to
406 comparing <var>s1</var> and <var>s2</var> with the <code>u8_normcoll</code> function.
407 </p>
408 <p><var>nf</var> must be either <code>UNINORM_NFC</code> or <code>UNINORM_NFKC</code>.
409 </p>
410 <p>The <var>resultbuf</var> and <var>lengthp</var> arguments are as described in
411 chapter <a href="libunistring_2.html#SEC8">Conventions</a>.
412 </p></dd></dl>
413
414 <dl>
415 <dt><u>Function:</u> int <b>u8_normcoll</b><i> (const&nbsp;uint8_t&nbsp;*<var>s1</var>, size_t&nbsp;<var>n1</var>, const&nbsp;uint8_t&nbsp;*<var>s2</var>, size_t&nbsp;<var>n2</var>, uninorm_t&nbsp;<var>nf</var>, int&nbsp;*<var>resultp</var>)</i>
416 <a name="IDX881"></a>
417 </dt>
418 <dt><u>Function:</u> int <b>u16_normcoll</b><i> (const&nbsp;uint16_t&nbsp;*<var>s1</var>, size_t&nbsp;<var>n1</var>, const&nbsp;uint16_t&nbsp;*<var>s2</var>, size_t&nbsp;<var>n2</var>, uninorm_t&nbsp;<var>nf</var>, int&nbsp;*<var>resultp</var>)</i>
419 <a name="IDX882"></a>
420 </dt>
421 <dt><u>Function:</u> int <b>u32_normcoll</b><i> (const&nbsp;uint32_t&nbsp;*<var>s1</var>, size_t&nbsp;<var>n1</var>, const&nbsp;uint32_t&nbsp;*<var>s2</var>, size_t&nbsp;<var>n2</var>, uninorm_t&nbsp;<var>nf</var>, int&nbsp;*<var>resultp</var>)</i>
422 <a name="IDX883"></a>
423 </dt>
424 <dd><p>Compares <var>s1</var> and <var>s2</var>, ignoring differences in normalization, using
425 the collation rules of the current locale.
426 </p>
427 <p><var>nf</var> must be either <code>UNINORM_NFC</code> or <code>UNINORM_NFKC</code>.
428 </p>
429 <p>If successful, sets <code>*<var>resultp</var></code> to -1 if <var>s1</var> &lt; <var>s2</var>,
430 0 if <var>s1</var> = <var>s2</var>, 1 if <var>s1</var> &gt; <var>s2</var>, and returns 0.
431 Upon failure, returns -1 with <code>errno</code> set.
432 </p></dd></dl>
433
434 <hr size="6">
435 <a name="Normalization-of-streams"></a>
436 <a name="SEC66"></a>
437 <h2 class="section"> <a href="libunistring_toc.html#TOC66">13.5 Normalization of streams of Unicode characters</a> </h2>
438
439 <p>A &ldquo;stream of Unicode characters&rdquo; is essentially a function that accepts an
440 <code>ucs4_t</code> argument repeatedly, optionally combined with a function that
441 &ldquo;flushes&rdquo; the stream.
442 </p>
443 <dl>
444 <dt><u>Type:</u> <b>struct uninorm_filter</b>
445 <a name="IDX884"></a>
446 </dt>
447 <dd><p>This is the data type of a stream of Unicode characters that normalizes its
448 input according to a given normalization form and passes the normalized
449 character sequence to the encapsulated stream of Unicode characters.
450 </p></dd></dl>
451
452 <dl>
453 <dt><u>Function:</u> struct uninorm_filter * <b>uninorm_filter_create</b><i> (uninorm_t&nbsp;<var>nf</var>, int&nbsp;(*<var>stream_func</var>)&nbsp;(void&nbsp;*<var>stream_data</var>, ucs4_t&nbsp;<var>uc</var>), void&nbsp;*<var>stream_data</var>)</i>
454 <a name="IDX885"></a>
455 </dt>
456 <dd><p>Creates and returns a normalization filter for Unicode characters.
457 </p>
458 <p>The pair (<var>stream_func</var>, <var>stream_data</var>) is the encapsulated stream.
459 <code><var>stream_func</var> (<var>stream_data</var>, <var>uc</var>)</code> receives the Unicode
460 character <var>uc</var> and returns 0 if successful, or -1 with <code>errno</code> set
461 upon failure.
462 </p>
463 <p>Returns the new filter, or NULL with <code>errno</code> set upon failure.
464 </p></dd></dl>
465
466 <dl>
467 <dt><u>Function:</u> int <b>uninorm_filter_write</b><i> (struct&nbsp;uninorm_filter&nbsp;*<var>filter</var>, ucs4_t&nbsp;<var>uc</var>)</i>
468 <a name="IDX886"></a>
469 </dt>
470 <dd><p>Stuffs a Unicode character into a normalizing filter.
471 Returns 0 if successful, or -1 with <code>errno</code> set upon failure.
472 </p></dd></dl>
473
474 <dl>
475 <dt><u>Function:</u> int <b>uninorm_filter_flush</b><i> (struct&nbsp;uninorm_filter&nbsp;*<var>filter</var>)</i>
476 <a name="IDX887"></a>
477 </dt>
478 <dd><p>Brings data buffered in the filter to its destination, the encapsulated stream.
479 </p>
480 <p>Returns 0 if successful, or -1 with <code>errno</code> set upon failure.
481 </p>
482 <p>Note! If after calling this function, additional characters are written
483 into the filter, the resulting character sequence in the encapsulated stream
484 will not necessarily be normalized.
485 </p></dd></dl>
486
487 <dl>
488 <dt><u>Function:</u> int <b>uninorm_filter_free</b><i> (struct&nbsp;uninorm_filter&nbsp;*<var>filter</var>)</i>
489 <a name="IDX888"></a>
490 </dt>
491 <dd><p>Brings data buffered in the filter to its destination, the encapsulated stream,
492 then closes and frees the filter.
493 </p>
494 <p>Returns 0 if successful, or -1 with <code>errno</code> set upon failure.
495 </p></dd></dl>
496 <hr size="6">
497 <table cellpadding="1" cellspacing="1" border="0">
498 <tr><td valign="middle" align="left">[<a href="#SEC61" title="Beginning of this chapter or previous chapter"> &lt;&lt; </a>]</td>
499 <td valign="middle" align="left">[<a href="libunistring_14.html#SEC67" title="Next chapter"> &gt;&gt; </a>]</td>
500 <td valign="middle" align="left"> &nbsp; </td>
501 <td valign="middle" align="left"> &nbsp; </td>
502 <td valign="middle" align="left"> &nbsp; </td>
503 <td valign="middle" align="left"> &nbsp; </td>
504 <td valign="middle" align="left"> &nbsp; </td>
505 <td valign="middle" align="left">[<a href="libunistring_toc.html#SEC_Top" title="Cover (top) of document">Top</a>]</td>
506 <td valign="middle" align="left">[<a href="libunistring_toc.html#SEC_Contents" title="Table of contents">Contents</a>]</td>
507 <td valign="middle" align="left">[<a href="libunistring_21.html#SEC92" title="Index">Index</a>]</td>
508 <td valign="middle" align="left">[<a href="libunistring_abt.html#SEC_About" title="About (help)"> ? </a>]</td>
509 </tr></table>
510 <p>
511  <font size="-1">
512   This document was generated by <em>Bruno Haible</em> on <em>October, 16 2022</em> using <a href="https://www.nongnu.org/texi2html/"><em>texi2html 1.78a</em></a>.
513  </font>
514  <br>
515
516 </p>
517 </body>
518 </html>