2 * pango-script.c: Script tag handling
4 * Copyright (C) 2002 Red Hat Software
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
21 * Implementation of pango_script_iter is derived from ICU:
23 * icu/sources/common/usc_impl.c
25 **********************************************************************
26 * Copyright (C) 1999-2002, International Business Machines
27 * Corporation and others. All Rights Reserved.
28 **********************************************************************
30 * Permission is hereby granted, free of charge, to any person obtaining a
31 * copy of this software and associated documentation files (the
32 * "Software"), to deal in the Software without restriction, including
33 * without limitation the rights to use, copy, modify, merge, publish,
34 * distribute, and/or sell copies of the Software, and to permit persons
35 * to whom the Software is furnished to do so, provided that the above
36 * copyright notice(s) and this permission notice appear in all copies of
37 * the Software and that both the above copyright notice(s) and this
38 * permission notice appear in supporting documentation.
40 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
41 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
42 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
43 * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
44 * HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
45 * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
46 * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
47 * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
48 * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
50 * Except as contained in this notice, the name of a copyright holder
51 * shall not be used in advertising or otherwise to promote the sale, use
52 * or other dealings in this Software without prior written authorization
53 * of the copyright holder.
60 #include "pango-script.h"
61 #include "pango-script-private.h"
64 * pango_script_for_unichar:
65 * @ch: a Unicode character
67 * Looks up the #PangoScript for a particular character (as defined by
68 * Unicode Standard Annex #24). No check is made for @ch being a
69 * valid Unicode character; if you pass in invalid character, the
70 * result is undefined.
72 * As of Pango 1.18, this function simply returns the return value of
73 * g_unichar_get_script().
75 * Return value: the #PangoScript for the character.
80 pango_script_for_unichar (gunichar ch)
82 return g_unichar_get_script (ch);
85 /**********************************************************************/
88 _pango_script_iter_init (PangoScriptIter *iter,
92 iter->text_start = text;
94 iter->text_end = text + length;
96 iter->text_end = text + strlen (text);
98 iter->script_start = text;
99 iter->script_end = text;
100 iter->script_code = PANGO_SCRIPT_COMMON;
104 pango_script_iter_next (iter);
110 * pango_script_iter_new:
111 * @text: a UTF-8 string
112 * @length: length of @text, or -1 if @text is nul-terminated.
114 * Create a new #PangoScriptIter, used to break a string of
115 * Unicode into runs by text. No copy is made of @text, so
116 * the caller needs to make sure it remains valid until
117 * the iterator is freed with pango_script_iter_free().
119 * Return value: the new script iterator, initialized
120 * to point at the first range in the text, which should be
121 * freed with pango_script_iter_free(). If the string is
122 * empty, it will point at an empty range.
127 pango_script_iter_new (const char *text,
130 return _pango_script_iter_init (g_slice_new (PangoScriptIter), text, length);
134 _pango_script_iter_fini (PangoScriptIter *iter)
139 * pango_script_iter_free:
140 * @iter: a #PangoScriptIter
142 * Frees a #PangoScriptIter created with pango_script_iter_new().
147 pango_script_iter_free (PangoScriptIter *iter)
149 _pango_script_iter_fini (iter);
150 g_slice_free (PangoScriptIter, iter);
154 * pango_script_iter_get_range:
155 * @iter: a #PangoScriptIter
156 * @start: location to store start position of the range, or %NULL
157 * @end: location to store end position of the range, or %NULL
158 * @script: location to store script for range, or %NULL
160 * Gets information about the range to which @iter currently points.
161 * The range is the set of locations p where *start <= p < *end.
162 * (That is, it doesn't include the character stored at *end)
167 pango_script_iter_get_range (PangoScriptIter *iter,
168 G_CONST_RETURN char **start,
169 G_CONST_RETURN char **end,
173 *start = iter->script_start;
175 *end = iter->script_end;
177 *script = iter->script_code;
180 static const gunichar paired_chars[] = {
181 0x0028, 0x0029, /* ascii paired punctuation */
185 0x00ab, 0x00bb, /* guillemets */
186 0x2018, 0x2019, /* general punctuation */
189 0x3008, 0x3009, /* chinese paired punctuation */
201 get_pair_index (gunichar ch)
204 int upper = G_N_ELEMENTS (paired_chars) - 1;
206 while (lower <= upper)
208 int mid = (lower + upper) / 2;
210 if (ch < paired_chars[mid])
212 else if (ch > paired_chars[mid])
221 /* duplicated in pango-language.c */
222 #define REAL_SCRIPT(script) \
223 ((script) > PANGO_SCRIPT_INHERITED && (script) != PANGO_SCRIPT_UNKNOWN)
225 #define SAME_SCRIPT(script1, script2) \
226 (!REAL_SCRIPT (script1) || !REAL_SCRIPT (script2) || (script1) == (script2))
228 #define IS_OPEN(pair_index) (((pair_index) & 1) == 0)
231 * pango_script_iter_next:
232 * @iter: a #PangoScriptIter
234 * Advances a #PangoScriptIter to the next range. If @iter
235 * is already at the end, it is left unchanged and %FALSE
238 * Return value: %TRUE if @iter was successfully advanced.
243 pango_script_iter_next (PangoScriptIter *iter)
247 if (iter->script_end == iter->text_end)
250 start_sp = iter->paren_sp;
251 iter->script_code = PANGO_SCRIPT_COMMON;
252 iter->script_start = iter->script_end;
254 for (; iter->script_end < iter->text_end; iter->script_end = g_utf8_next_char (iter->script_end))
256 gunichar ch = g_utf8_get_char (iter->script_end);
260 sc = pango_script_for_unichar (ch);
261 if (sc != PANGO_SCRIPT_COMMON)
264 pair_index = get_pair_index (ch);
267 * Paired character handling:
269 * if it's an open character, push it onto the stack.
270 * if it's a close character, find the matching open on the
271 * stack, and use that script code. Any non-matching open
272 * characters above it on the stack will be poped.
276 if (IS_OPEN (pair_index))
279 * If the paren stack is full, empty it. This
280 * means that deeply nested paired punctuation
281 * characters will be ignored, but that's an unusual
282 * case, and it's better to ignore them than to
283 * write off the end of the stack...
285 if (++iter->paren_sp >= PAREN_STACK_DEPTH)
288 iter->paren_stack[iter->paren_sp].pair_index = pair_index;
289 iter->paren_stack[iter->paren_sp].script_code = iter->script_code;
291 else if (iter->paren_sp >= 0)
293 int pi = pair_index & ~1;
295 while (iter->paren_sp >= 0 && iter->paren_stack[iter->paren_sp].pair_index != pi)
298 if (iter->paren_sp < start_sp)
299 start_sp = iter->paren_sp;
301 if (iter->paren_sp >= 0)
302 sc = iter->paren_stack[iter->paren_sp].script_code;
306 if (SAME_SCRIPT (iter->script_code, sc))
308 if (!REAL_SCRIPT (iter->script_code) && REAL_SCRIPT (sc))
310 iter->script_code = sc;
313 * now that we have a final script code, fix any open
314 * characters we pushed before we knew the script code.
316 while (start_sp < iter->paren_sp)
317 iter->paren_stack[++start_sp].script_code = iter->script_code;
321 * if this character is a close paired character,
322 * pop it from the stack
324 if (pair_index >= 0 && !IS_OPEN (pair_index) && iter->paren_sp >= 0)
328 if (iter->paren_sp < start_sp)
329 start_sp = iter->paren_sp;
334 /* Different script, we're done */
342 /**********************************************************
343 * End of code from ICU
344 **********************************************************/